feat(smart-scraper-multi): add schema to graphs and created SmartScraperMultiGraph

2026-06-25 21:11:11 +08:00 · 2024-05-21 13:13:27 +02:00 · 2024-05-21 13:13:27 +02:00 · fc58e2d3a6
commit fc58e2d3a6
parent 5701afe927
35 changed files with 401 additions and 172 deletions
--- a/examples/knowledge_graph/kg_custom_graph.py
+++ b/examples/knowledge_graph/kg_custom_graph.py
@ -0,0 +1,134 @@
 """
 Example of custom graph for creating a knowledge graph
 """
 import os, json
 from dotenv import load_dotenv
 from langchain_openai import OpenAIEmbeddings
 from scrapegraphai.models import OpenAI
 from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
 from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
 load_dotenv()
 # ************************************************
 # Define the output schema
 # ************************************************
 schema= """{ 
    "Job Postings": { 
        "Company x": [ 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            }, 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            } 
        ], 
        "Company y": [ 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            } 
        ] 
    } 
 }"""
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key": openai_key,
        "model": "gpt-4o",
    },
    "verbose": True,
    "headless": False,
 }
 # ************************************************
 # Define the graph nodes
 # ************************************************
 llm_model = OpenAI(graph_config["llm"])
 embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
 smart_scraper_instance = SmartScraperGraph(
    prompt="",
    source="",
    config=graph_config,
 )
 # ************************************************
 # Define the graph nodes
 # ************************************************
 graph_iterator_node = GraphIteratorNode(
    input="user_prompt & urls",
    output=["results"],
    node_config={
        "graph_instance": smart_scraper_instance,
    }
 )
 merge_answers_node = MergeAnswersNode(
    input="user_prompt & results",
    output=["answer"],
    node_config={
        "llm_model": llm_model,
        "schema": schema
    }
 )
 knowledge_graph_node = KnowledgeGraphNode(
    input="user_prompt & answer",
    output=["kg"],
    node_config={
        "llm_model": llm_model,
    }
 )
 graph = BaseGraph(
    nodes=[
        graph_iterator_node,
        merge_answers_node,
        knowledge_graph_node
    ],
    edges=[
        (graph_iterator_node, merge_answers_node),
        (merge_answers_node, knowledge_graph_node)
    ],
    entry_point=graph_iterator_node
 )
 # ************************************************
 # Execute the graph
 # ************************************************
 result, execution_info = graph.execute({
    "user_prompt": "List me all the Machine Learning Engineer job postings",
    "urls": [
        "https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
        "https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
        "https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
        ],
 })
 # get the answer from the result
 result = result.get("answer", "No answer found.")
 print(json.dumps(result, indent=4))
--- a/examples/openai/custom_graph_openai.py
+++ b/examples/openai/custom_graph_openai.py
@ -46,7 +46,7 @@ robot_node = RobotsNode(
 fetch_node = FetchNode(
    input="url | local_dir",
-    output=["doc"],
+    output=["doc", "link_urls", "img_urls"],
    node_config={
        "verbose": True,
        "headless": True,
--- a/examples/openai/multiple_search_openai.py
+++ b/examples/openai/multiple_search_openai.py
@ -1,79 +0,0 @@
 """ 
 Basic example of scraping pipeline using SmartScraper
 """
 import os
 from dotenv import load_dotenv
 from scrapegraphai.graphs import MultipleSearchGraph
 from scrapegraphai.utils import prettify_exec_info
 load_dotenv()
 schema= """{ 
    "Job Postings": { 
        "Company x": [ 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            }, 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            } 
        ], 
        "Company y": [ 
            { 
                "title": "...", 
                "description": "...", 
                "location": "...", 
                "date_posted": "..", 
                "requirements": ["...", "...", "..."] 
            } 
        ] 
    } 
 }"""
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key": openai_key,
        "model": "gpt-4o",
    },
    "verbose": True,
    "headless": False,
    "schema": schema,
 }
 multiple_search_graph = MultipleSearchGraph(
    prompt="List me all the projects with their description",
    source= [
        "https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
        "https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
        "https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
        ],
    config=graph_config,
 )
 result = multiple_search_graph.run()
 print(result)
 # ************************************************
 # Get graph execution info
 # ************************************************
 graph_exec_info = multiple_search_graph.get_execution_info()
 print(prettify_exec_info(graph_exec_info))
--- a/examples/openai/omni_scraper_openai.py
+++ b/examples/openai/omni_scraper_openai.py
@ -19,7 +19,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key": openai_key,
-        "model": "gpt-4-turbo",
+        "model": "gpt-4o",
    },
    "verbose": True,
    "headless": True,
--- a/examples/openai/omni_search_graph_openai.py
+++ b/examples/openai/omni_search_graph_openai.py
@ -20,7 +20,7 @@ graph_config = {
        "model": "gpt-4o",
    },
    "max_results": 2,
-    "max_images": 5,
+    "max_images": 1,
    "verbose": True,
 }
--- a/examples/openai/smart_scraper_multi_openai.py
+++ b/examples/openai/smart_scraper_multi_openai.py
@ -0,0 +1,41 @@
 """ 
 Basic example of scraping pipeline using SmartScraper
 """
 import os, json
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperMultiGraph
 load_dotenv()
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key": openai_key,
        "model": "gpt-4o",
    },
    "verbose": True,
    "headless": False,
 }
 # *******************************************************
 # Create the SmartScraperMultiGraph instance and run it
 # *******************************************************
 multiple_search_graph = SmartScraperMultiGraph(
    prompt="Who is Marco Perini?",
    source= [
        "https://perinim.github.io/",
        "https://perinim.github.io/cv/"
        ],
    schema=None,
    config=graph_config
 )
 result = multiple_search_graph.run()
 print(json.dumps(result, indent=4))
--- a/examples/openai/smart_scraper_schema_openai.py
+++ b/examples/openai/smart_scraper_schema_openai.py
@ -0,0 +1,59 @@
 """ 
 Basic example of scraping pipeline using SmartScraper
 """
 import os, json
 from dotenv import load_dotenv
 from scrapegraphai.graphs import SmartScraperGraph
 load_dotenv()
 # ************************************************
 # Define the output schema for the graph
 # ************************************************
 schema= """
    { 
    "Projects": [
        "Project #": 
            { 
                "title": "...", 
                "description": "...", 
            }, 
        "Project #": 
            { 
                "title": "...", 
                "description": "...", 
            } 
        ] 
    } 
 """
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key":openai_key,
        "model": "gpt-3.5-turbo",
    },
    "verbose": True,
    "headless": False,
 }
 # ************************************************
 # Create the SmartScraperGraph instance and run it
 # ************************************************
 smart_scraper_graph = SmartScraperGraph(
    prompt="List me all the projects with their description",
    source="https://perinim.github.io/projects/",
    schema=schema,
    config=graph_config
 )
 result = smart_scraper_graph.run()
 print(json.dumps(result, indent=4))
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@ -45,6 +45,10 @@ certifi==2024.2.2
    # via requests
 charset-normalizer==3.3.2
    # via requests
 colorama==0.4.6
    # via ipython
    # via pytest
    # via tqdm
 dataclasses-json==0.6.6
    # via langchain
    # via langchain-community
@ -100,6 +104,7 @@ graphviz==0.20.3
    # via scrapegraphai
 greenlet==3.0.3
    # via playwright
    # via sqlalchemy
 groq==0.5.0
    # via langchain-groq
 grpcio==1.63.0
@ -212,8 +217,6 @@ pandas==2.2.2
    # via scrapegraphai
 parso==0.8.4
    # via jedi
 pexpect==4.9.0
    # via ipython
 playwright==1.43.0
    # via scrapegraphai
 pluggy==1.5.0
@ -230,8 +233,6 @@ protobuf==4.25.3
    # via googleapis-common-protos
    # via grpcio-status
    # via proto-plus
 ptyprocess==0.7.0
    # via pexpect
 pure-eval==0.2.2
    # via stack-data
 pyasn1==0.6.0
--- a/requirements.lock
+++ b/requirements.lock
@ -45,6 +45,9 @@ certifi==2024.2.2
    # via requests
 charset-normalizer==3.3.2
    # via requests
 colorama==0.4.6
    # via ipython
    # via tqdm
 dataclasses-json==0.6.6
    # via langchain
    # via langchain-community
@ -99,6 +102,7 @@ graphviz==0.20.3
    # via scrapegraphai
 greenlet==3.0.3
    # via playwright
    # via sqlalchemy
 groq==0.5.0
    # via langchain-groq
 grpcio==1.63.0
@ -208,8 +212,6 @@ pandas==2.2.2
    # via scrapegraphai
 parso==0.8.4
    # via jedi
 pexpect==4.9.0
    # via ipython
 playwright==1.43.0
    # via scrapegraphai
 prompt-toolkit==3.0.43
@ -224,8 +226,6 @@ protobuf==4.25.3
    # via googleapis-common-protos
    # via grpcio-status
    # via proto-plus
 ptyprocess==0.7.0
    # via pexpect
 pure-eval==0.2.2
    # via stack-data
 pyasn1==0.6.0
--- a/scrapegraphai/graphs/init.py
+++ b/scrapegraphai/graphs/init.py
@ -15,4 +15,4 @@ from .csv_scraper_graph import CSVScraperGraph
 from .pdf_scraper_graph import PDFScraperGraph
 from .omni_scraper_graph import OmniScraperGraph
 from .omni_search_graph import OmniSearchGraph
-from .multiple_search_graph import MultipleSearchGraph
+from .smart_scraper_multi_graph import SmartScraperMultiGraph
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@ -7,10 +7,11 @@ from langchain_aws import BedrockEmbeddings
 from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
 from ..helpers import models_tokens
 from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
 from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
 from ..helpers import models_tokens
 from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
 class AbstractGraph(ABC):
    """
@ -19,6 +20,7 @@ class AbstractGraph(ABC):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client,
                        configured for generating embeddings.
@ -29,6 +31,7 @@ class AbstractGraph(ABC):
        prompt (str): The prompt for the graph.
        config (dict): Configuration parameters for the graph.
        source (str, optional): The source of the graph.
        schema (str, optional): The schema for the graph output.
    Example:
        >>> class MyGraph(AbstractGraph):
@ -40,11 +43,12 @@ class AbstractGraph(ABC):
        >>> result = my_graph.run()
    """
-    def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
+    def __init__(self, prompt: str, config: dict, source: Optional[str] = None, schema: Optional[str] = None):
        self.prompt = prompt
        self.source = source
        self.config = config
        self.schema = schema
        self.llm_model = self._create_llm(config["llm"], chat=True)
        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
                                                            ) if "embeddings" not in config else self._create_embedder(
@ -61,14 +65,14 @@ class AbstractGraph(ABC):
        self.headless = True if config is None else config.get(
            "headless", True)
        self.loader_kwargs = config.get("loader_kwargs", {})
        self.schema = config.get("schema", None)
-        common_params = {"headless": self.headless,
+        common_params = {
-                         "verbose": self.verbose,
+            "headless": self.headless,
-                         "loader_kwargs": self.loader_kwargs,
+            "verbose": self.verbose,
-                         "llm_model": self.llm_model,
+            "loader_kwargs": self.loader_kwargs,
-                         "embedder_model": self.embedder_model,
+            "llm_model": self.llm_model,
-                         "schema": self.schema}
+            "embedder_model": self.embedder_model
            }
        self.set_common_params(common_params, overwrite=False)
--- a/scrapegraphai/graphs/csv_scraper_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_graph.py
@ -1,14 +1,18 @@
 """
 Module for creating the smart scraper
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerCSVNode
 )
 from .abstract_graph import AbstractGraph
 class CSVScraperGraph(AbstractGraph):
@ -17,11 +21,11 @@ class CSVScraperGraph(AbstractGraph):
    information from web pages using a natural language model to interpret and answer prompts.
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
        """
        Initializes the CSVScraperGraph with a prompt, source, and configuration.
        """
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "csv" if source.endswith("csv") else "csv_dir"
@ -53,6 +57,7 @@ class CSVScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema,
            }
        )
--- a/scrapegraphai/graphs/deep_scraper_graph.py
+++ b/scrapegraphai/graphs/deep_scraper_graph.py
@ -2,7 +2,11 @@
 DeepScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    SearchLinkNode,
@ -12,7 +16,6 @@ from ..nodes import (
    GraphIteratorNode,
    MergeAnswersNode
 )
 from .abstract_graph import AbstractGraph
 class DeepScraperGraph(AbstractGraph):
@ -30,15 +33,19 @@ class DeepScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
        headless (bool): A flag indicating whether to run the graph in headless mode.
    Args:
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> deep_scraper = DeepScraperGraph(
        ...     "List me all the job titles and detailed job description.",
@ -49,8 +56,10 @@ class DeepScraperGraph(AbstractGraph):
        )
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+    
        super().__init__(prompt, config, source, schema)
        self.input_key = "url" if source.startswith("http") else "local_dir"
    def _create_repeated_graph(self) -> BaseGraph:
@ -84,7 +93,8 @@ class DeepScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
        search_node = SearchLinkNode(
@ -108,6 +118,7 @@ class DeepScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/graphs/json_scraper_graph.py
+++ b/scrapegraphai/graphs/json_scraper_graph.py
@ -2,14 +2,17 @@
 JSONScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
 from .abstract_graph import AbstractGraph
 class JSONScraperGraph(AbstractGraph):
@ -20,6 +23,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -30,6 +34,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> json_scraper = JSONScraperGraph(
@ -40,8 +45,8 @@ class JSONScraperGraph(AbstractGraph):
        >>> result = json_scraper.run()
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "json" if source.endswith("json") else "json_dir"
@ -76,7 +81,8 @@ class JSONScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/graphs/omni_scraper_graph.py
+++ b/scrapegraphai/graphs/omni_scraper_graph.py
@ -2,7 +2,11 @@
 OmniScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
@ -10,8 +14,8 @@ from ..nodes import (
    RAGNode,
    GenerateAnswerOmniNode
 )
-from scrapegraphai.models import OpenAIImageToText
+
-from .abstract_graph import AbstractGraph
+from ..models import OpenAIImageToText
 class OmniScraperGraph(AbstractGraph):
@ -24,6 +28,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -35,6 +40,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> omni_scraper = OmniScraperGraph(
@ -46,11 +52,11 @@ class OmniScraperGraph(AbstractGraph):
        )
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
        self.max_images = 5 if config is None else config.get("max_images", 5)
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "url" if source.startswith("http") else "local_dir"
@ -96,7 +102,8 @@ class OmniScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/graphs/omni_search_graph.py
+++ b/scrapegraphai/graphs/omni_search_graph.py
@ -3,15 +3,17 @@ OmniSearchGraph Module
 """
 from copy import copy, deepcopy
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .omni_scraper_graph import OmniScraperGraph
 from ..nodes import (
    SearchInternetNode,
    GraphIteratorNode,
    MergeAnswersNode
 )
 from .abstract_graph import AbstractGraph
 from .omni_scraper_graph import OmniScraperGraph
 class OmniSearchGraph(AbstractGraph):
@ -31,6 +33,7 @@ class OmniSearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
        schema (Optional[str]): The schema for the graph output.
    Example:
        >>> omni_search_graph = OmniSearchGraph(
@ -40,7 +43,7 @@ class OmniSearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """
-    def __init__(self, prompt: str, config: dict):
+    def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
        self.max_results = config.get("max_results", 3)
@ -49,7 +52,7 @@ class OmniSearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)
-        super().__init__(prompt, config)
+        super().__init__(prompt, config, schema)
    def _create_graph(self) -> BaseGraph:
        """
@ -94,6 +97,7 @@ class OmniSearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/graphs/pdf_scraper_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_graph.py
@ -2,14 +2,17 @@
 PDFScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
 from .abstract_graph import AbstractGraph
 class PDFScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> pdf_scraper = PDFScraperGraph(
@ -42,8 +47,8 @@ class PDFScraperGraph(AbstractGraph):
        >>> result = pdf_scraper.run()
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir"
@ -79,6 +84,7 @@ class PDFScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema,
            }
        )
--- a/scrapegraphai/graphs/script_creator_graph.py
+++ b/scrapegraphai/graphs/script_creator_graph.py
@ -2,13 +2,16 @@
 ScriptCreatorGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    GenerateScraperNode
 )
 from .abstract_graph import AbstractGraph
 class ScriptCreatorGraph(AbstractGraph):
@ -19,6 +22,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -31,6 +35,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> script_creator = ScriptCreatorGraph(
@ -41,11 +46,11 @@ class ScriptCreatorGraph(AbstractGraph):
        >>> result = script_creator.run()
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
        self.library = config['library']
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "url" if source.startswith("http") else "local_dir"
@ -65,14 +70,16 @@ class ScriptCreatorGraph(AbstractGraph):
            input="doc",
            output=["parsed_doc"],
            node_config={"chunk_size": self.model_token,
                         "verbose": self.verbose,
                         "parse_html": False
                         }
        )
        generate_scraper_node = GenerateScraperNode(
            input="user_prompt & (doc)",
            output=["answer"],
-            node_config={"llm_model": self.llm_model},
+            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema,
            },
            library=self.library,
            website=self.source
        )
--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@ -3,15 +3,17 @@ SearchGraph Module
 """
 from copy import copy, deepcopy
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .smart_scraper_graph import SmartScraperGraph
 from ..nodes import (
    SearchInternetNode,
    GraphIteratorNode,
    MergeAnswersNode
 )
 from .abstract_graph import AbstractGraph
 from .smart_scraper_graph import SmartScraperGraph
 class SearchGraph(AbstractGraph):
@ -30,6 +32,7 @@ class SearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
        schema (Optional[str]): The schema for the graph output.
    Example:
        >>> search_graph = SearchGraph(
@ -39,7 +42,7 @@ class SearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """
-    def __init__(self, prompt: str, config: dict):
+    def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
        self.max_results = config.get("max_results", 3)
@ -48,7 +51,7 @@ class SearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)
-        super().__init__(prompt, config)
+        super().__init__(prompt, config, schema)
    def _create_graph(self) -> BaseGraph:
        """
@ -93,6 +96,7 @@ class SearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@ -2,14 +2,17 @@
 SmartScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
 from .abstract_graph import AbstractGraph
 class SmartScraperGraph(AbstractGraph):
@ -22,6 +25,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> smart_scraper = SmartScraperGraph(
@ -43,8 +48,8 @@ class SmartScraperGraph(AbstractGraph):
        )
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "url" if source.startswith("http") else "local_dir"
@ -82,7 +87,7 @@ class SmartScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
-                "schema": self.config.get("schema", None),
+                "schema": self.schema,
            }
        )
--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@ -1,25 +1,25 @@
 """ 
-MultipleSearchGraph Module
+SmartScraperMultiGraph Module
 """
 from copy import copy, deepcopy
 from typing import List, Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .smart_scraper_graph import SmartScraperGraph
 from ..nodes import (
    GraphIteratorNode,
    MergeAnswersNode,
    KnowledgeGraphNode
 )
 from .abstract_graph import AbstractGraph
 from .smart_scraper_graph import SmartScraperGraph
 from typing import List, Optional
-class MultipleSearchGraph(AbstractGraph):
+class SmartScraperMultiGraph(AbstractGraph):
    """ 
-    MultipleSearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
+    SmartScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
-    It only requires a user prompt to search the internet and generate an answer.
+    It only requires a user prompt and a list of URLs.
    Attributes:
        prompt (str): The user prompt to search the internet.
@ -31,7 +31,9 @@ class MultipleSearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (Optional[str]): The schema for the graph output.
    Example:
        >>> search_graph = MultipleSearchGraph(
@ -41,7 +43,7 @@ class MultipleSearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """
-    def __init__(self, prompt: str, source: List[str], config: dict):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
        self.max_results = config.get("max_results", 3)
@ -50,7 +52,7 @@ class MultipleSearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
    def _create_graph(self) -> BaseGraph:
        """
@ -87,15 +89,7 @@ class MultipleSearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
-                "schema": self.config.get("schema", None),
+                "schema": self.schema
            }
        )
        knowledge_graph_node = KnowledgeGraphNode(
            input="user_prompt & answer",
            output=["kg"],
            node_config={
                "llm_model": self.llm_model,
            }
        )
@ -103,11 +97,9 @@ class MultipleSearchGraph(AbstractGraph):
            nodes=[
                graph_iterator_node,
                merge_answers_node,
                knowledge_graph_node
            ],
            edges=[
                (graph_iterator_node, merge_answers_node),
                (merge_answers_node, knowledge_graph_node)
            ],
            entry_point=graph_iterator_node
        )
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@ -2,9 +2,11 @@
 SpeechGraph Module
 """
-from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
+from typing import Optional
-from ..models import OpenAITextToSpeech
+
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
@ -12,7 +14,9 @@ from ..nodes import (
    GenerateAnswerNode,
    TextToSpeechNode,
 )
-from .abstract_graph import AbstractGraph
+
 from ..utils.save_audio_from_bytes import save_audio_from_bytes
 from ..models import OpenAITextToSpeech
 class SpeechGraph(AbstractGraph):
@ -23,6 +27,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
@ -33,6 +38,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> speech_graph = SpeechGraph(
@ -41,8 +47,8 @@ class SpeechGraph(AbstractGraph):
        ...     {"llm": {"model": "gpt-3.5-turbo"}}
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "url" if source.startswith("http") else "local_dir"
@ -76,7 +82,8 @@ class SpeechGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
        text_to_speech_node = TextToSpeechNode(
--- a/scrapegraphai/graphs/xml_scraper_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_graph.py
@ -2,14 +2,17 @@
 XMLScraperGraph Module
 """
 from typing import Optional
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
 from .abstract_graph import AbstractGraph
 class XMLScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
        schema (str): The schema for the graph output.
    Example:
        >>> xml_scraper = XMLScraperGraph(
@ -42,8 +47,8 @@ class XMLScraperGraph(AbstractGraph):
        >>> result = xml_scraper.run()
    """
-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)
        self.input_key = "xml" if source.endswith("xml") else "xml_dir"
@ -78,7 +83,8 @@ class XMLScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
                "schema": self.schema
            }
        )
--- a/scrapegraphai/nodes/conditional_node.py
+++ b/scrapegraphai/nodes/conditional_node.py
@ -1,6 +1,7 @@
 """ 
 Module for implementing the conditional node
 """
 from .base_node import BaseNode
--- a/scrapegraphai/nodes/generate_answer_csv_node.py
+++ b/scrapegraphai/nodes/generate_answer_csv_node.py
@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel
 from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
 # Imports from the library
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
 class GenerateAnswerCSVNode(BaseNode):
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema
 class GenerateAnswerNode(BaseNode):
    """
    A node that generates an answer using a large language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/generate_answer_omni_node.py
+++ b/scrapegraphai/nodes/generate_answer_omni_node.py
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
 class GenerateAnswerOmniNode(BaseNode):
    """
    A node that generates an answer using a large language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/generate_answer_pdf_node.py
+++ b/scrapegraphai/nodes/generate_answer_pdf_node.py
@ -14,6 +14,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
 class GenerateAnswerPDFNode(BaseNode):
    """
    A node that generates an answer using a language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/graph_iterator_node.py
+++ b/scrapegraphai/nodes/graph_iterator_node.py
@ -10,7 +10,6 @@ from tqdm.asyncio import tqdm
 from .base_node import BaseNode
 _default_batchsize = 16
--- a/scrapegraphai/nodes/knowledge_graph_node.py
+++ b/scrapegraphai/nodes/knowledge_graph_node.py
@ -14,6 +14,7 @@ from langchain_core.output_parsers import JsonOutputParser
 from .base_node import BaseNode
 from ..utils import create_graph, create_interactive_graph
 class KnowledgeGraphNode(BaseNode):
    """
    A node responsible for generating a knowledge graph from a dictionary.
--- a/scrapegraphai/nodes/parse_node.py
+++ b/scrapegraphai/nodes/parse_node.py
@ -3,8 +3,10 @@ ParseNode Module
 """
 from typing import List, Optional
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_transformers import Html2TextTransformer
 from .base_node import BaseNode
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@ -3,6 +3,7 @@ RAGNode Module
 """
 from typing import List, Optional
 from langchain.docstore.document import Document
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
--- a/scrapegraphai/nodes/robots_node.py
+++ b/scrapegraphai/nodes/robots_node.py
@ -4,9 +4,11 @@ RobotsNode Module
 from typing import List, Optional
 from urllib.parse import urlparse
 from langchain_community.document_loaders import AsyncChromiumLoader
 from langchain.prompts import PromptTemplate
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from .base_node import BaseNode
 from ..helpers import robots_dictionary
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@ -3,8 +3,10 @@ SearchInternetNode Module
 """
 from typing import List, Optional
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
 from ..utils.research_web import search_on_web
 from .base_node import BaseNode
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@ -6,7 +6,6 @@ SearchLinkNode Module
 from typing import List, Optional
 from tqdm import tqdm
 # Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
`@ -10,7 +10,6 @@ from tqdm.asyncio import tqdm`

	`from .base_node import BaseNode`	`from .base_node import BaseNode`


	`_default_batchsize = 16`	`_default_batchsize = 16`