Merge pull request #394 from inchoate/update-docs-for-schema

This commit is contained in:
Marco Vinciguerra 2024-06-18 20:53:41 +02:00 committed by GitHub
commit 61d08a5be8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 73 additions and 82 deletions

View File

@ -2,32 +2,31 @@
Basic example of scraping pipeline using SmartScraper with schema
"""
import os, json
import json
import os
from typing import Dict
from dotenv import load_dotenv
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
class Project(BaseModel):
title: str
description: str
class Projects(BaseModel):
Projects: Dict[str, Project]
# ************************************************
# Define the configuration for the graph
@ -37,7 +36,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key":openai_key,
"api_key": openai_key,
"model": "gpt-3.5-turbo",
},
"verbose": True,
@ -51,8 +50,8 @@ graph_config = {
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
schema=Projects,
config=graph_config,
)
result = smart_scraper_graph.run()

View File

@ -4,6 +4,9 @@ Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
import os
from dotenv import load_dotenv
from typing import Dict
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
from langchain_community.llms import HuggingFaceEndpoint
@ -13,22 +16,12 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
class Project(BaseModel):
title: str
description: str
class Projects(BaseModel):
Projects: Dict[str, Project]
## required environment variable in .env
#HUGGINGFACEHUB_API_TOKEN
@ -61,7 +54,7 @@ graph_config = {
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=schema,
schema=Projects,
config=graph_config
)
result = smart_scraper_graph.run()

View File

@ -2,8 +2,13 @@
Basic example of scraping pipeline using SmartScraper with schema
"""
import os, json
import json
import os
from typing import Dict, List
from dotenv import load_dotenv
from pydantic import BaseModel
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
@ -13,22 +18,12 @@ load_dotenv()
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
class Project(BaseModel):
title: str
description: str
class Projects(BaseModel):
Projects: Dict[str, Project]
# ************************************************
# Define the configuration for the graph
@ -60,7 +55,7 @@ smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=schema,
schema=Projects,
config=graph_config
)

View File

@ -39,7 +39,7 @@ class AbstractGraph(ABC):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.

View File

@ -5,6 +5,8 @@ CSVScraperMultiGraph Module
from copy import copy, deepcopy
from typing import List, Optional
from pydantic import BaseModel
from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .csv_scraper_graph import CSVScraperGraph
@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = MultipleSearchGraph(
@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
>>> result = search_graph.run()
"""
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
self.max_results = config.get("max_results", 3)

View File

@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> deep_scraper = DeepScraperGraph(

View File

@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> json_scraper = JSONScraperGraph(

View File

@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = MultipleSearchGraph(

View File

@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> omni_scraper = OmniScraperGraph(

View File

@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
Args:
prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> omni_search_graph = OmniSearchGraph(

View File

@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> pdf_scraper = PDFScraperGraph(

View File

@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = MultipleSearchGraph(

View File

@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> script_creator = ScriptCreatorGraph(

View File

@ -5,6 +5,8 @@ ScriptCreatorMultiGraph Module
from copy import copy, deepcopy
from typing import List, Optional
from pydantic import BaseModel
from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .script_creator_graph import ScriptCreatorGraph
@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> script_graph = ScriptCreatorMultiGraph(
... "What is Chioggia famous for?",
@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
>>> result = script_graph.run()
"""
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
self.max_results = config.get("max_results", 3)

View File

@ -33,7 +33,7 @@ class SearchGraph(AbstractGraph):
Args:
prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = SearchGraph(

View File

@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> smart_scraper = SmartScraperGraph(

View File

@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = MultipleSearchGraph(

View File

@ -28,7 +28,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
@ -39,7 +39,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> speech_graph = SpeechGraph(

View File

@ -24,7 +24,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
@ -36,7 +36,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
schema (BaseModel): The schema for the graph output.
Example:
>>> xml_scraper = XMLScraperGraph(

View File

@ -34,7 +34,7 @@ class XMLScraperMultiGraph(AbstractGraph):
prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
schema (Optional[BaseModel]): The schema for the graph output.
Example:
>>> search_graph = MultipleSearchGraph(