feat(smart-scraper-multi): add schema to graphs and created SmartScraperMultiGraph

2026-06-25 21:11:11 +08:00 · 2024-05-21 13:13:27 +02:00 · 2024-05-21 13:13:27 +02:00 · fc58e2d3a6
commit fc58e2d3a6
parent 5701afe927
35 changed files with 401 additions and 172 deletions
--- a/examples/knowledge_graph/kg_custom_graph.py
+++ b/examples/knowledge_graph/kg_custom_graph.py
@ -0,0 +1,134 @@
+"""
+Example of custom graph for creating a knowledge graph
+"""
+
+import os, json
+from dotenv import load_dotenv
+
+from langchain_openai import OpenAIEmbeddings
+from scrapegraphai.models import OpenAI
+from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
+from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
+
+load_dotenv()
+
+# ************************************************
+# Define the output schema
+# ************************************************
+
+schema= """{ 
+    "Job Postings": { 
+        "Company x": [ 
+            { 
+                "title": "...", 
+                "description": "...", 
+                "location": "...", 
+                "date_posted": "..", 
+                "requirements": ["...", "...", "..."] 
+            }, 
+            { 
+                "title": "...", 
+                "description": "...", 
+                "location": "...", 
+                "date_posted": "..", 
+                "requirements": ["...", "...", "..."] 
+            } 
+        ], 
+        "Company y": [ 
+            { 
+                "title": "...", 
+                "description": "...", 
+                "location": "...", 
+                "date_posted": "..", 
+                "requirements": ["...", "...", "..."] 
+            } 
+        ] 
+    } 
+}"""
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+openai_key = os.getenv("OPENAI_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": openai_key,
+        "model": "gpt-4o",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Define the graph nodes
+# ************************************************
+
+llm_model = OpenAI(graph_config["llm"])
+embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
+
+smart_scraper_instance = SmartScraperGraph(
+    prompt="",
+    source="",
+    config=graph_config,
+)
+
+# ************************************************
+# Define the graph nodes
+# ************************************************
+
+graph_iterator_node = GraphIteratorNode(
+    input="user_prompt & urls",
+    output=["results"],
+    node_config={
+        "graph_instance": smart_scraper_instance,
+    }
+)
+
+merge_answers_node = MergeAnswersNode(
+    input="user_prompt & results",
+    output=["answer"],
+    node_config={
+        "llm_model": llm_model,
+        "schema": schema
+    }
+)
+
+knowledge_graph_node = KnowledgeGraphNode(
+    input="user_prompt & answer",
+    output=["kg"],
+    node_config={
+        "llm_model": llm_model,
+    }
+)
+
+graph = BaseGraph(
+    nodes=[
+        graph_iterator_node,
+        merge_answers_node,
+        knowledge_graph_node
+    ],
+    edges=[
+        (graph_iterator_node, merge_answers_node),
+        (merge_answers_node, knowledge_graph_node)
+    ],
+    entry_point=graph_iterator_node
+)
+
+# ************************************************
+# Execute the graph
+# ************************************************
+
+result, execution_info = graph.execute({
+    "user_prompt": "List me all the Machine Learning Engineer job postings",
+    "urls": [
+        "https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
+        "https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
+        "https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
+        ],
+})
+
+# get the answer from the result
+result = result.get("answer", "No answer found.")
+print(json.dumps(result, indent=4))
--- a/examples/openai/custom_graph_openai.py
+++ b/examples/openai/custom_graph_openai.py
@ -46,7 +46,7 @@ robot_node = RobotsNode(

 fetch_node = FetchNode(
    input="url | local_dir",
-    output=["doc"],
+    output=["doc", "link_urls", "img_urls"],
    node_config={
        "verbose": True,
        "headless": True,
--- a/examples/openai/multiple_search_openai.py
+++ b/examples/openai/multiple_search_openai.py
@ -1,79 +0,0 @@
-""" 
-Basic example of scraping pipeline using SmartScraper
-"""
-
-import os
-from dotenv import load_dotenv
-from scrapegraphai.graphs import MultipleSearchGraph
-from scrapegraphai.utils import prettify_exec_info
-
-load_dotenv()
-
-
-schema= """{ 
-    "Job Postings": { 
-        "Company x": [ 
-            { 
-                "title": "...", 
-                "description": "...", 
-                "location": "...", 
-                "date_posted": "..", 
-                "requirements": ["...", "...", "..."] 
-            }, 
-            { 
-                "title": "...", 
-                "description": "...", 
-                "location": "...", 
-                "date_posted": "..", 
-                "requirements": ["...", "...", "..."] 
-            } 
-        ], 
-        "Company y": [ 
-            { 
-                "title": "...", 
-                "description": "...", 
-                "location": "...", 
-                "date_posted": "..", 
-                "requirements": ["...", "...", "..."] 
-            } 
-        ] 
-    } 
-}"""
-
-# ************************************************
-# Define the configuration for the graph
-# ************************************************
-
-openai_key = os.getenv("OPENAI_APIKEY")
-
-graph_config = {
-    "llm": {
-        "api_key": openai_key,
-        "model": "gpt-4o",
-    },
-    "verbose": True,
-    "headless": False,
-    "schema": schema,
-}
-
-
-
-multiple_search_graph = MultipleSearchGraph(
-    prompt="List me all the projects with their description",
-    source= [
-        "https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
-        "https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
-        "https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
-        ],
-    config=graph_config,
-)
-
-result = multiple_search_graph.run()
-print(result)
-
-# ************************************************
-# Get graph execution info
-# ************************************************
-
-graph_exec_info = multiple_search_graph.get_execution_info()
-print(prettify_exec_info(graph_exec_info))
--- a/examples/openai/omni_scraper_openai.py
+++ b/examples/openai/omni_scraper_openai.py
@ -19,7 +19,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
 graph_config = {
    "llm": {
        "api_key": openai_key,
-        "model": "gpt-4-turbo",
+        "model": "gpt-4o",
    },
    "verbose": True,
    "headless": True,
--- a/examples/openai/omni_search_graph_openai.py
+++ b/examples/openai/omni_search_graph_openai.py
@ -20,7 +20,7 @@ graph_config = {
        "model": "gpt-4o",
    },
    "max_results": 2,
-    "max_images": 5,
+    "max_images": 1,
    "verbose": True,
 }

--- a/examples/openai/smart_scraper_multi_openai.py
+++ b/examples/openai/smart_scraper_multi_openai.py
@ -0,0 +1,41 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os, json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperMultiGraph
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+openai_key = os.getenv("OPENAI_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": openai_key,
+        "model": "gpt-4o",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# *******************************************************
+# Create the SmartScraperMultiGraph instance and run it
+# *******************************************************
+
+multiple_search_graph = SmartScraperMultiGraph(
+    prompt="Who is Marco Perini?",
+    source= [
+        "https://perinim.github.io/",
+        "https://perinim.github.io/cv/"
+        ],
+    schema=None,
+    config=graph_config
+)
+
+result = multiple_search_graph.run()
+print(json.dumps(result, indent=4))
--- a/examples/openai/smart_scraper_schema_openai.py
+++ b/examples/openai/smart_scraper_schema_openai.py
@ -0,0 +1,59 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os, json
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SmartScraperGraph
+
+load_dotenv()
+
+# ************************************************
+# Define the output schema for the graph
+# ************************************************
+
+schema= """
+    { 
+    "Projects": [
+        "Project #": 
+            { 
+                "title": "...", 
+                "description": "...", 
+            }, 
+        "Project #": 
+            { 
+                "title": "...", 
+                "description": "...", 
+            } 
+        ] 
+    } 
+"""
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+openai_key = os.getenv("OPENAI_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key":openai_key,
+        "model": "gpt-3.5-turbo",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SmartScraperGraph(
+    prompt="List me all the projects with their description",
+    source="https://perinim.github.io/projects/",
+    schema=schema,
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(json.dumps(result, indent=4))
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@ -45,6 +45,10 @@ certifi==2024.2.2
    # via requests
 charset-normalizer==3.3.2
    # via requests
+colorama==0.4.6
+    # via ipython
+    # via pytest
+    # via tqdm
 dataclasses-json==0.6.6
    # via langchain
    # via langchain-community
@ -100,6 +104,7 @@ graphviz==0.20.3
    # via scrapegraphai
 greenlet==3.0.3
    # via playwright
+    # via sqlalchemy
 groq==0.5.0
    # via langchain-groq
 grpcio==1.63.0
@ -212,8 +217,6 @@ pandas==2.2.2
    # via scrapegraphai
 parso==0.8.4
    # via jedi
-pexpect==4.9.0
-    # via ipython
 playwright==1.43.0
    # via scrapegraphai
 pluggy==1.5.0
@ -230,8 +233,6 @@ protobuf==4.25.3
    # via googleapis-common-protos
    # via grpcio-status
    # via proto-plus
-ptyprocess==0.7.0
-    # via pexpect
 pure-eval==0.2.2
    # via stack-data
 pyasn1==0.6.0
--- a/requirements.lock
+++ b/requirements.lock
@ -45,6 +45,9 @@ certifi==2024.2.2
    # via requests
 charset-normalizer==3.3.2
    # via requests
+colorama==0.4.6
+    # via ipython
+    # via tqdm
 dataclasses-json==0.6.6
    # via langchain
    # via langchain-community
@ -99,6 +102,7 @@ graphviz==0.20.3
    # via scrapegraphai
 greenlet==3.0.3
    # via playwright
+    # via sqlalchemy
 groq==0.5.0
    # via langchain-groq
 grpcio==1.63.0
@ -208,8 +212,6 @@ pandas==2.2.2
    # via scrapegraphai
 parso==0.8.4
    # via jedi
-pexpect==4.9.0
-    # via ipython
 playwright==1.43.0
    # via scrapegraphai
 prompt-toolkit==3.0.43
@ -224,8 +226,6 @@ protobuf==4.25.3
    # via googleapis-common-protos
    # via grpcio-status
    # via proto-plus
-ptyprocess==0.7.0
-    # via pexpect
 pure-eval==0.2.2
    # via stack-data
 pyasn1==0.6.0
--- a/scrapegraphai/graphs/init.py
+++ b/scrapegraphai/graphs/init.py
@ -15,4 +15,4 @@ from .csv_scraper_graph import CSVScraperGraph
 from .pdf_scraper_graph import PDFScraperGraph
 from .omni_scraper_graph import OmniScraperGraph
 from .omni_search_graph import OmniSearchGraph
-from .multiple_search_graph import MultipleSearchGraph
+from .smart_scraper_multi_graph import SmartScraperMultiGraph
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@ -7,10 +7,11 @@ from langchain_aws import BedrockEmbeddings
 from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
 from langchain_google_genai import GoogleGenerativeAIEmbeddings
-from ..helpers import models_tokens
-from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
 from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

+from ..helpers import models_tokens
+from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
+

 class AbstractGraph(ABC):
    """
@ -19,6 +20,7 @@ class AbstractGraph(ABC):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client,
                        configured for generating embeddings.
@ -29,6 +31,7 @@ class AbstractGraph(ABC):
        prompt (str): The prompt for the graph.
        config (dict): Configuration parameters for the graph.
        source (str, optional): The source of the graph.
+        schema (str, optional): The schema for the graph output.

    Example:
        >>> class MyGraph(AbstractGraph):
@ -40,11 +43,12 @@ class AbstractGraph(ABC):
        >>> result = my_graph.run()
    """

-    def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
+    def __init__(self, prompt: str, config: dict, source: Optional[str] = None, schema: Optional[str] = None):

        self.prompt = prompt
        self.source = source
        self.config = config
+        self.schema = schema
        self.llm_model = self._create_llm(config["llm"], chat=True)
        self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
                                                            ) if "embeddings" not in config else self._create_embedder(
@ -61,14 +65,14 @@ class AbstractGraph(ABC):
        self.headless = True if config is None else config.get(
            "headless", True)
        self.loader_kwargs = config.get("loader_kwargs", {})
-        self.schema = config.get("schema", None)

-        common_params = {"headless": self.headless,
-                         "verbose": self.verbose,
-                         "loader_kwargs": self.loader_kwargs,
-                         "llm_model": self.llm_model,
-                         "embedder_model": self.embedder_model,
-                         "schema": self.schema}
+        common_params = {
+            "headless": self.headless,
+            "verbose": self.verbose,
+            "loader_kwargs": self.loader_kwargs,
+            "llm_model": self.llm_model,
+            "embedder_model": self.embedder_model
+            }
        
        self.set_common_params(common_params, overwrite=False)

--- a/scrapegraphai/graphs/csv_scraper_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_graph.py
@ -1,14 +1,18 @@
 """
 Module for creating the smart scraper
 """
+
+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerCSVNode
 )
-from .abstract_graph import AbstractGraph


 class CSVScraperGraph(AbstractGraph):
@ -17,11 +21,11 @@ class CSVScraperGraph(AbstractGraph):
    information from web pages using a natural language model to interpret and answer prompts.
    """

-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
        """
        Initializes the CSVScraperGraph with a prompt, source, and configuration.
        """
-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)

        self.input_key = "csv" if source.endswith("csv") else "csv_dir"

@ -53,6 +57,7 @@ class CSVScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
+                "schema": self.schema,
            }
        )

--- a/scrapegraphai/graphs/deep_scraper_graph.py
+++ b/scrapegraphai/graphs/deep_scraper_graph.py
@ -2,7 +2,11 @@
 DeepScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    SearchLinkNode,
@ -12,7 +16,6 @@ from ..nodes import (
    GraphIteratorNode,
    MergeAnswersNode
 )
-from .abstract_graph import AbstractGraph


 class DeepScraperGraph(AbstractGraph):
@ -30,15 +33,19 @@ class DeepScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
        headless (bool): A flag indicating whether to run the graph in headless mode.
+        
    Args:
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
+
    Example:
        >>> deep_scraper = DeepScraperGraph(
        ...     "List me all the job titles and detailed job description.",
@ -49,8 +56,10 @@ class DeepScraperGraph(AbstractGraph):
        )
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+    
+        super().__init__(prompt, config, source, schema)
+
        self.input_key = "url" if source.startswith("http") else "local_dir"

    def _create_repeated_graph(self) -> BaseGraph:
@ -84,7 +93,8 @@ class DeepScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )
        search_node = SearchLinkNode(
@ -108,6 +118,7 @@ class DeepScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/graphs/json_scraper_graph.py
+++ b/scrapegraphai/graphs/json_scraper_graph.py
@ -2,14 +2,17 @@
 JSONScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
-from .abstract_graph import AbstractGraph


 class JSONScraperGraph(AbstractGraph):
@ -20,6 +23,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -30,6 +34,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> json_scraper = JSONScraperGraph(
@ -40,8 +45,8 @@ class JSONScraperGraph(AbstractGraph):
        >>> result = json_scraper.run()
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+        super().__init__(prompt, config, source, schema)

        self.input_key = "json" if source.endswith("json") else "json_dir"

@ -76,7 +81,8 @@ class JSONScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/graphs/omni_scraper_graph.py
+++ b/scrapegraphai/graphs/omni_scraper_graph.py
@ -2,7 +2,11 @@
 OmniScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
@ -10,8 +14,8 @@ from ..nodes import (
    RAGNode,
    GenerateAnswerOmniNode
 )
-from scrapegraphai.models import OpenAIImageToText
-from .abstract_graph import AbstractGraph
+
+from ..models import OpenAIImageToText


 class OmniScraperGraph(AbstractGraph):
@ -24,6 +28,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -35,6 +40,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> omni_scraper = OmniScraperGraph(
@ -46,11 +52,11 @@ class OmniScraperGraph(AbstractGraph):
        )
    """

-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):

        self.max_images = 5 if config is None else config.get("max_images", 5)

-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)

        self.input_key = "url" if source.startswith("http") else "local_dir"
        
@ -96,7 +102,8 @@ class OmniScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/graphs/omni_search_graph.py
+++ b/scrapegraphai/graphs/omni_search_graph.py
@ -3,15 +3,17 @@ OmniSearchGraph Module
 """

 from copy import copy, deepcopy
+from typing import Optional

 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from .omni_scraper_graph import OmniScraperGraph
+
 from ..nodes import (
    SearchInternetNode,
    GraphIteratorNode,
    MergeAnswersNode
 )
-from .abstract_graph import AbstractGraph
-from .omni_scraper_graph import OmniScraperGraph


 class OmniSearchGraph(AbstractGraph):
@ -31,6 +33,7 @@ class OmniSearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
+        schema (Optional[str]): The schema for the graph output.

    Example:
        >>> omni_search_graph = OmniSearchGraph(
@ -40,7 +43,7 @@ class OmniSearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """

-    def __init__(self, prompt: str, config: dict):
+    def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):

        self.max_results = config.get("max_results", 3)

@ -49,7 +52,7 @@ class OmniSearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)

-        super().__init__(prompt, config)
+        super().__init__(prompt, config, schema)

    def _create_graph(self) -> BaseGraph:
        """
@ -94,6 +97,7 @@ class OmniSearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/graphs/pdf_scraper_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_graph.py
@ -2,14 +2,17 @@
 PDFScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
-from .abstract_graph import AbstractGraph


 class PDFScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> pdf_scraper = PDFScraperGraph(
@ -42,8 +47,8 @@ class PDFScraperGraph(AbstractGraph):
        >>> result = pdf_scraper.run()
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+        super().__init__(prompt, config, source, schema)

        self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir"

@ -79,6 +84,7 @@ class PDFScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
+                "schema": self.schema,
            }
        )

--- a/scrapegraphai/graphs/script_creator_graph.py
+++ b/scrapegraphai/graphs/script_creator_graph.py
@ -2,13 +2,16 @@
 ScriptCreatorGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    GenerateScraperNode
 )
-from .abstract_graph import AbstractGraph


 class ScriptCreatorGraph(AbstractGraph):
@ -19,6 +22,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -31,6 +35,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> script_creator = ScriptCreatorGraph(
@ -41,11 +46,11 @@ class ScriptCreatorGraph(AbstractGraph):
        >>> result = script_creator.run()
    """

-    def __init__(self, prompt: str, source: str, config: dict):
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):

        self.library = config['library']

-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)

        self.input_key = "url" if source.startswith("http") else "local_dir"

@ -65,14 +70,16 @@ class ScriptCreatorGraph(AbstractGraph):
            input="doc",
            output=["parsed_doc"],
            node_config={"chunk_size": self.model_token,
-                         "verbose": self.verbose,
                         "parse_html": False
                         }
        )
        generate_scraper_node = GenerateScraperNode(
            input="user_prompt & (doc)",
            output=["answer"],
-            node_config={"llm_model": self.llm_model},
+            node_config={
+                "llm_model": self.llm_model,
+                "schema": self.schema,
+            },
            library=self.library,
            website=self.source
        )
--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@ -3,15 +3,17 @@ SearchGraph Module
 """

 from copy import copy, deepcopy
+from typing import Optional

 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from .smart_scraper_graph import SmartScraperGraph
+
 from ..nodes import (
    SearchInternetNode,
    GraphIteratorNode,
    MergeAnswersNode
 )
-from .abstract_graph import AbstractGraph
-from .smart_scraper_graph import SmartScraperGraph


 class SearchGraph(AbstractGraph):
@ -30,6 +32,7 @@ class SearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
+        schema (Optional[str]): The schema for the graph output.

    Example:
        >>> search_graph = SearchGraph(
@ -39,7 +42,7 @@ class SearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """

-    def __init__(self, prompt: str, config: dict):
+    def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):

        self.max_results = config.get("max_results", 3)

@ -48,7 +51,7 @@ class SearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)

-        super().__init__(prompt, config)
+        super().__init__(prompt, config, schema)

    def _create_graph(self) -> BaseGraph:
        """
@ -93,6 +96,7 @@ class SearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@ -2,14 +2,17 @@
 SmartScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
-from .abstract_graph import AbstractGraph


 class SmartScraperGraph(AbstractGraph):
@ -22,6 +25,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> smart_scraper = SmartScraperGraph(
@ -43,8 +48,8 @@ class SmartScraperGraph(AbstractGraph):
        )
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+        super().__init__(prompt, config, source, schema)

        self.input_key = "url" if source.startswith("http") else "local_dir"

@ -82,7 +87,7 @@ class SmartScraperGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
-                "schema": self.config.get("schema", None),
+                "schema": self.schema,
            }
        )

--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@ -1,25 +1,25 @@
 """ 
-MultipleSearchGraph Module
+SmartScraperMultiGraph Module
 """

 from copy import copy, deepcopy
+from typing import List, Optional

 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+from .smart_scraper_graph import SmartScraperGraph
+
 from ..nodes import (
    GraphIteratorNode,
    MergeAnswersNode,
    KnowledgeGraphNode
 )
-from .abstract_graph import AbstractGraph
-from .smart_scraper_graph import SmartScraperGraph
-
-from typing import List, Optional


-class MultipleSearchGraph(AbstractGraph):
+class SmartScraperMultiGraph(AbstractGraph):
    """ 
-    MultipleSearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
-    It only requires a user prompt to search the internet and generate an answer.
+    SmartScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
+    It only requires a user prompt and a list of URLs.

    Attributes:
        prompt (str): The user prompt to search the internet.
@ -31,7 +31,9 @@ class MultipleSearchGraph(AbstractGraph):

    Args:
        prompt (str): The user prompt to search the internet.
+        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (Optional[str]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
@ -41,7 +43,7 @@ class MultipleSearchGraph(AbstractGraph):
        >>> result = search_graph.run()
    """

-    def __init__(self, prompt: str, source: List[str], config: dict):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):

        self.max_results = config.get("max_results", 3)

@ -50,7 +52,7 @@ class MultipleSearchGraph(AbstractGraph):
        else:
            self.copy_config = deepcopy(config)

-        super().__init__(prompt, config, source)
+        super().__init__(prompt, config, source, schema)

    def _create_graph(self) -> BaseGraph:
        """
@ -87,15 +89,7 @@ class MultipleSearchGraph(AbstractGraph):
            output=["answer"],
            node_config={
                "llm_model": self.llm_model,
-                "schema": self.config.get("schema", None),
-            }
-        )
-
-        knowledge_graph_node = KnowledgeGraphNode(
-            input="user_prompt & answer",
-            output=["kg"],
-            node_config={
-                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

@ -103,11 +97,9 @@ class MultipleSearchGraph(AbstractGraph):
            nodes=[
                graph_iterator_node,
                merge_answers_node,
-                knowledge_graph_node
            ],
            edges=[
                (graph_iterator_node, merge_answers_node),
-                (merge_answers_node, knowledge_graph_node)
            ],
            entry_point=graph_iterator_node
        )
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@ -2,9 +2,11 @@
 SpeechGraph Module
 """

-from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
-from ..models import OpenAITextToSpeech
+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
@ -12,7 +14,9 @@ from ..nodes import (
    GenerateAnswerNode,
    TextToSpeechNode,
 )
-from .abstract_graph import AbstractGraph
+
+from ..utils.save_audio_from_bytes import save_audio_from_bytes
+from ..models import OpenAITextToSpeech


 class SpeechGraph(AbstractGraph):
@ -23,6 +27,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
@ -33,6 +38,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> speech_graph = SpeechGraph(
@ -41,8 +47,8 @@ class SpeechGraph(AbstractGraph):
        ...     {"llm": {"model": "gpt-3.5-turbo"}}
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+        super().__init__(prompt, config, source, schema)

        self.input_key = "url" if source.startswith("http") else "local_dir"

@ -76,7 +82,8 @@ class SpeechGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )
        text_to_speech_node = TextToSpeechNode(
--- a/scrapegraphai/graphs/xml_scraper_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_graph.py
@ -2,14 +2,17 @@
 XMLScraperGraph Module
 """

+from typing import Optional
+
 from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
 from ..nodes import (
    FetchNode,
    ParseNode,
    RAGNode,
    GenerateAnswerNode
 )
-from .abstract_graph import AbstractGraph


 class XMLScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -32,6 +36,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
+        schema (str): The schema for the graph output.

    Example:
        >>> xml_scraper = XMLScraperGraph(
@ -42,8 +47,8 @@ class XMLScraperGraph(AbstractGraph):
        >>> result = xml_scraper.run()
    """

-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
+    def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
+        super().__init__(prompt, config, source, schema)

        self.input_key = "xml" if source.endswith("xml") else "xml_dir"

@ -78,7 +83,8 @@ class XMLScraperGraph(AbstractGraph):
            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
            output=["answer"],
            node_config={
-                "llm_model": self.llm_model
+                "llm_model": self.llm_model,
+                "schema": self.schema
            }
        )

--- a/scrapegraphai/nodes/conditional_node.py
+++ b/scrapegraphai/nodes/conditional_node.py
@ -1,6 +1,7 @@
 """ 
 Module for implementing the conditional node
 """
+
 from .base_node import BaseNode


--- a/scrapegraphai/nodes/generate_answer_csv_node.py
+++ b/scrapegraphai/nodes/generate_answer_csv_node.py
@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.runnables import RunnableParallel

-from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
-
 # Imports from the library
 from .base_node import BaseNode
+from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv


 class GenerateAnswerCSVNode(BaseNode):
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema

+
 class GenerateAnswerNode(BaseNode):
    """
    A node that generates an answer using a large language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/generate_answer_omni_node.py
+++ b/scrapegraphai/nodes/generate_answer_omni_node.py
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni

+
 class GenerateAnswerOmniNode(BaseNode):
    """
    A node that generates an answer using a large language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/generate_answer_pdf_node.py
+++ b/scrapegraphai/nodes/generate_answer_pdf_node.py
@ -14,6 +14,7 @@ from langchain_core.runnables import RunnableParallel
 from .base_node import BaseNode
 from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf

+
 class GenerateAnswerPDFNode(BaseNode):
    """
    A node that generates an answer using a language model (LLM) based on the user's input
--- a/scrapegraphai/nodes/graph_iterator_node.py
+++ b/scrapegraphai/nodes/graph_iterator_node.py
@ -10,7 +10,6 @@ from tqdm.asyncio import tqdm

 from .base_node import BaseNode

-
 _default_batchsize = 16


--- a/scrapegraphai/nodes/knowledge_graph_node.py
+++ b/scrapegraphai/nodes/knowledge_graph_node.py
@ -14,6 +14,7 @@ from langchain_core.output_parsers import JsonOutputParser
 from .base_node import BaseNode
 from ..utils import create_graph, create_interactive_graph

+
 class KnowledgeGraphNode(BaseNode):
    """
    A node responsible for generating a knowledge graph from a dictionary.
--- a/scrapegraphai/nodes/parse_node.py
+++ b/scrapegraphai/nodes/parse_node.py
@ -3,8 +3,10 @@ ParseNode Module
 """

 from typing import List, Optional
+
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_transformers import Html2TextTransformer
+
 from .base_node import BaseNode


--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@ -3,6 +3,7 @@ RAGNode Module
 """

 from typing import List, Optional
+
 from langchain.docstore.document import Document
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
--- a/scrapegraphai/nodes/robots_node.py
+++ b/scrapegraphai/nodes/robots_node.py
@ -4,9 +4,11 @@ RobotsNode Module

 from typing import List, Optional
 from urllib.parse import urlparse
+
 from langchain_community.document_loaders import AsyncChromiumLoader
 from langchain.prompts import PromptTemplate
 from langchain.output_parsers import CommaSeparatedListOutputParser
+
 from .base_node import BaseNode
 from ..helpers import robots_dictionary

--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@ -3,8 +3,10 @@ SearchInternetNode Module
 """

 from typing import List, Optional
+
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
+
 from ..utils.research_web import search_on_web
 from .base_node import BaseNode

--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@ -6,7 +6,6 @@ SearchLinkNode Module
 from typing import List, Optional
 from tqdm import tqdm

-
 # Imports from Langchain
 from langchain.prompts import PromptTemplate
 from langchain_core.output_parsers import JsonOutputParser