fix: updated for schema changes

docs: updated for schema changes
This commit is contained in:
Jason Vertrees 2024-06-18 12:36:50 -05:00
parent a8251bdb85
commit aedda44868
20 changed files with 73 additions and 82 deletions

View File

@ -2,32 +2,31 @@
Basic example of scraping pipeline using SmartScraper with schema Basic example of scraping pipeline using SmartScraper with schema
""" """
import os, json import json
import os
from typing import Dict
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.graphs import SmartScraperGraph
load_dotenv() load_dotenv()
# ************************************************ # ************************************************
# Define the output schema for the graph # Define the output schema for the graph
# ************************************************ # ************************************************
schema= """
{ class Project(BaseModel):
"Projects": [ title: str
"Project #": description: str
{
"title": "...",
"description": "...", class Projects(BaseModel):
}, Projects: Dict[str, Project]
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************ # ************************************************
# Define the configuration for the graph # Define the configuration for the graph
@ -37,7 +36,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
graph_config = { graph_config = {
"llm": { "llm": {
"api_key":openai_key, "api_key": openai_key,
"model": "gpt-3.5-turbo", "model": "gpt-3.5-turbo",
}, },
"verbose": True, "verbose": True,
@ -51,8 +50,8 @@ graph_config = {
smart_scraper_graph = SmartScraperGraph( smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description", prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/", source="https://perinim.github.io/projects/",
schema=schema, schema=Projects,
config=graph_config config=graph_config,
) )
result = smart_scraper_graph.run() result = smart_scraper_graph.run()

View File

@ -4,6 +4,9 @@ Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
from typing import Dict
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info from scrapegraphai.utils import prettify_exec_info
from langchain_community.llms import HuggingFaceEndpoint from langchain_community.llms import HuggingFaceEndpoint
@ -13,22 +16,12 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
# Define the output schema for the graph # Define the output schema for the graph
# ************************************************ # ************************************************
schema= """ class Project(BaseModel):
{ title: str
"Projects": [ description: str
"Project #":
{ class Projects(BaseModel):
"title": "...", Projects: Dict[str, Project]
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
## required environment variable in .env ## required environment variable in .env
#HUGGINGFACEHUB_API_TOKEN #HUGGINGFACEHUB_API_TOKEN
@ -61,7 +54,7 @@ graph_config = {
smart_scraper_graph = SmartScraperGraph( smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description", prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/", source="https://perinim.github.io/projects/",
schema=schema, schema=Projects,
config=graph_config config=graph_config
) )
result = smart_scraper_graph.run() result = smart_scraper_graph.run()

View File

@ -2,8 +2,13 @@
Basic example of scraping pipeline using SmartScraper with schema Basic example of scraping pipeline using SmartScraper with schema
""" """
import os, json import json
import os
from typing import Dict, List
from dotenv import load_dotenv from dotenv import load_dotenv
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info from scrapegraphai.utils import prettify_exec_info
@ -13,22 +18,12 @@ load_dotenv()
# Define the output schema for the graph # Define the output schema for the graph
# ************************************************ # ************************************************
schema= """ class Project(BaseModel):
{ title: str
"Projects": [ description: str
"Project #":
{ class Projects(BaseModel):
"title": "...", Projects: Dict[str, Project]
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************ # ************************************************
# Define the configuration for the graph # Define the configuration for the graph
@ -60,7 +55,7 @@ smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.", prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code # also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/", source="https://perinim.github.io/projects/",
schema=schema, schema=Projects,
config=graph_config config=graph_config
) )

View File

@ -39,7 +39,7 @@ class AbstractGraph(ABC):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.

View File

@ -5,6 +5,8 @@ CSVScraperMultiGraph Module
from copy import copy, deepcopy from copy import copy, deepcopy
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph from .abstract_graph import AbstractGraph
from .csv_scraper_graph import CSVScraperGraph from .csv_scraper_graph import CSVScraperGraph
@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(
@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
>>> result = search_graph.run() >>> result = search_graph.run()
""" """
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None): def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
self.max_results = config.get("max_results", 3) self.max_results = config.get("max_results", 3)

View File

@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> deep_scraper = DeepScraperGraph( >>> deep_scraper = DeepScraperGraph(

View File

@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> json_scraper = JSONScraperGraph( >>> json_scraper = JSONScraperGraph(

View File

@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(

View File

@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> omni_scraper = OmniScraperGraph( >>> omni_scraper = OmniScraperGraph(

View File

@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
Args: Args:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> omni_search_graph = OmniSearchGraph( >>> omni_search_graph = OmniSearchGraph(

View File

@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> pdf_scraper = PDFScraperGraph( >>> pdf_scraper = PDFScraperGraph(

View File

@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(

View File

@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> script_creator = ScriptCreatorGraph( >>> script_creator = ScriptCreatorGraph(

View File

@ -5,6 +5,8 @@ ScriptCreatorMultiGraph Module
from copy import copy, deepcopy from copy import copy, deepcopy
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph from .abstract_graph import AbstractGraph
from .script_creator_graph import ScriptCreatorGraph from .script_creator_graph import ScriptCreatorGraph
@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> script_graph = ScriptCreatorMultiGraph( >>> script_graph = ScriptCreatorMultiGraph(
... "What is Chioggia famous for?", ... "What is Chioggia famous for?",
@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
>>> result = script_graph.run() >>> result = script_graph.run()
""" """
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None): def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
self.max_results = config.get("max_results", 3) self.max_results = config.get("max_results", 3)

View File

@ -33,7 +33,7 @@ class SearchGraph(AbstractGraph):
Args: Args:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = SearchGraph( >>> search_graph = SearchGraph(

View File

@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> smart_scraper = SmartScraperGraph( >>> smart_scraper = SmartScraperGraph(

View File

@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(

View File

@ -28,7 +28,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings. embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution. verbose (bool): A flag indicating whether to show print statements during execution.
@ -39,7 +39,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> speech_graph = SpeechGraph( >>> speech_graph = SpeechGraph(

View File

@ -24,7 +24,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -36,7 +36,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output. schema (BaseModel): The schema for the graph output.
Example: Example:
>>> xml_scraper = XMLScraperGraph( >>> xml_scraper = XMLScraperGraph(

View File

@ -34,7 +34,7 @@ class XMLScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph. source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output. schema (Optional[BaseModel]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(