mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
Merge pull request #394 from inchoate/update-docs-for-schema
This commit is contained in:
commit
61d08a5be8
@ -2,32 +2,31 @@
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
import json
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
|
||||
|
||||
class Projects(BaseModel):
|
||||
Projects: Dict[str, Project]
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
@ -37,7 +36,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key":openai_key,
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
"verbose": True,
|
||||
@ -51,8 +50,8 @@ graph_config = {
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
schema=Projects,
|
||||
config=graph_config,
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
|
||||
@ -4,6 +4,9 @@ Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from typing import Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
@ -13,22 +16,12 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
class Project(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
|
||||
class Projects(BaseModel):
|
||||
Projects: Dict[str, Project]
|
||||
|
||||
## required environment variable in .env
|
||||
#HUGGINGFACEHUB_API_TOKEN
|
||||
@ -61,7 +54,7 @@ graph_config = {
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
result = smart_scraper_graph.run()
|
||||
|
||||
@ -2,8 +2,13 @@
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel
|
||||
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
@ -13,22 +18,12 @@ load_dotenv()
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
class Project(BaseModel):
|
||||
title: str
|
||||
description: str
|
||||
|
||||
class Projects(BaseModel):
|
||||
Projects: Dict[str, Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
@ -60,7 +55,7 @@ smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@ class AbstractGraph(ABC):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
|
||||
@ -5,6 +5,8 @@ CSVScraperMultiGraph Module
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .csv_scraper_graph import CSVScraperGraph
|
||||
@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
|
||||
>>> result = search_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> deep_scraper = DeepScraperGraph(
|
||||
|
||||
@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> json_scraper = JSONScraperGraph(
|
||||
|
||||
@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
|
||||
@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> omni_scraper = OmniScraperGraph(
|
||||
|
||||
@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> omni_search_graph = OmniSearchGraph(
|
||||
|
||||
@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> pdf_scraper = PDFScraperGraph(
|
||||
|
||||
@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
|
||||
@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> script_creator = ScriptCreatorGraph(
|
||||
|
||||
@ -5,6 +5,8 @@ ScriptCreatorMultiGraph Module
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .script_creator_graph import ScriptCreatorGraph
|
||||
@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
Example:
|
||||
>>> script_graph = ScriptCreatorMultiGraph(
|
||||
... "What is Chioggia famous for?",
|
||||
@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
|
||||
>>> result = script_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ class SearchGraph(AbstractGraph):
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = SearchGraph(
|
||||
|
||||
@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> smart_scraper = SmartScraperGraph(
|
||||
|
||||
@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
|
||||
@ -28,7 +28,7 @@ class SpeechGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||
@ -39,7 +39,7 @@ class SpeechGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> speech_graph = SpeechGraph(
|
||||
|
||||
@ -24,7 +24,7 @@ class XMLScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
llm_model: An instance of a language model client, configured for generating answers.
|
||||
embedder_model: An instance of an embedding model client,
|
||||
configured for generating embeddings.
|
||||
@ -36,7 +36,7 @@ class XMLScraperGraph(AbstractGraph):
|
||||
prompt (str): The prompt for the graph.
|
||||
source (str): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (str): The schema for the graph output.
|
||||
schema (BaseModel): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> xml_scraper = XMLScraperGraph(
|
||||
|
||||
@ -34,7 +34,7 @@ class XMLScraperMultiGraph(AbstractGraph):
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
schema (Optional[BaseModel]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
|
||||
Loading…
Reference in New Issue
Block a user