fix: removed unused

2026-06-23 21:00:30 +08:00 · 2024-05-15 15:51:27 +02:00 · 2024-05-15 15:51:27 +02:00 · 5587a64d23
commit 5587a64d23
parent 7ced0d0564
3 changed files with 0 additions and 264 deletions
--- a/scrapegraphai/graphs/init.py
+++ b/scrapegraphai/graphs/init.py
@ -5,7 +5,6 @@ __init__.py file for graphs folder
 from .abstract_graph import AbstractGraph
 from .base_graph import BaseGraph
 from .smart_scraper_graph import SmartScraperGraph
-from .deep_scraper_graph import DeepScraperGraph
 from .speech_graph import SpeechGraph
 from .search_graph import SearchGraph
 from .script_creator_graph import ScriptCreatorGraph
@ -15,4 +14,3 @@ from .csv_scraper_graph import CSVScraperGraph
 from .pdf_scraper_graph import PDFScraperGraph
 from .omni_scraper_graph import OmniScraperGraph
 from .omni_search_graph import OmniSearchGraph
-from .turbo_scraper import TurboScraperGraph
--- a/scrapegraphai/graphs/deep_scraper_graph.py
+++ b/scrapegraphai/graphs/deep_scraper_graph.py
@ -1,116 +0,0 @@
-"""
-DeepScraperGraph Module
-"""
-
-from .base_graph import BaseGraph
-from ..nodes import (
-    FetchNode,
-    SearchLinkNode,
-    ParseNode,
-    RAGNode,
-    GenerateAnswerNode
-)
-from .abstract_graph import AbstractGraph
-
-
-class DeepScraperGraph(AbstractGraph):
-    """
-    [WIP]
-
-    DeepScraper is a scraping pipeline that automates the process of 
-    extracting information from web pages
-    using a natural language model to interpret and answer prompts.
-
-    Unlike SmartScraper, DeepScraper can navigate to the links within the input webpage,
-    to fuflfil the task within the prompt.
-
-    
-    Attributes:
-        prompt (str): The prompt for the graph.
-        source (str): The source of the graph.
-        config (dict): Configuration parameters for the graph.
-        llm_model: An instance of a language model client, configured for generating answers.
-        embedder_model: An instance of an embedding model client, 
-        configured for generating embeddings.
-        verbose (bool): A flag indicating whether to show print statements during execution.
-        headless (bool): A flag indicating whether to run the graph in headless mode.
-    Args:
-        prompt (str): The prompt for the graph.
-        source (str): The source of the graph.
-        config (dict): Configuration parameters for the graph.
-    Example:
-        >>> deep_scraper = DeepScraperGraph(
-        ...     "List me all the job titles and detailed job description.",
-        ...     "https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
-        ...     {"llm": {"model": "gpt-3.5-turbo"}}
-        ... )
-        >>> result = deep_scraper.run()
-        )
-    """
-
-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
-
-        self.input_key = "url" if source.startswith("http") else "local_dir"
-
-    def _create_graph(self) -> BaseGraph:
-        """
-        Creates the graph of nodes representing the workflow for web scraping.
-        Returns:
-            BaseGraph: A graph instance representing the web scraping workflow.
-        """
-        fetch_node = FetchNode(
-            input="url | local_dir",
-            output=["doc", "link_urls", "img_urls"]
-        )
-        parse_node = ParseNode(
-            input="doc",
-            output=["parsed_doc"],
-            node_config={
-                "chunk_size": self.model_token
-            }
-        )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
-        search_node = SearchLinkNode(
-            input="user_prompt & relevant_chunks",
-            output=["relevant_links"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
-
-        return BaseGraph(
-            nodes=[
-                fetch_node,
-                parse_node,
-                rag_node,
-                search_node
-            ],
-            edges=[
-                (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, search_node)
-
-            ],
-            entry_point=fetch_node
-        )
-
-    def run(self) -> str:
-        """
-        Executes the scraping process and returns the answer to the prompt.
-        Returns:
-            str: The answer to the prompt.
-        """
-
-        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
-        self.final_state, self.execution_info = self.graph.execute(inputs)
-
-        return self.final_state.get("answer", "No answer found.")
--- a/scrapegraphai/graphs/turbo_scraper.py
+++ b/scrapegraphai/graphs/turbo_scraper.py
@ -1,146 +0,0 @@
-"""
-SmartScraperGraph Module
-"""
-
-from .base_graph import BaseGraph
-from ..nodes import (
-    FetchNode,
-    ParseNode,
-    RAGNode,
-    SearchLinksWithContext,
-    GraphIteratorNode,
-    MergeAnswersNode
-)
-from .search_graph import SearchGraph
-from .abstract_graph import AbstractGraph
-
-
-class SmartScraperGraph(AbstractGraph):
-    """
-    SmartScraper is a scraping pipeline that automates the process of
-    extracting information from web pages
-    using a natural language model to interpret and answer prompts.
-
-    Attributes:
-        prompt (str): The prompt for the graph.
-        source (str): The source of the graph.
-        config (dict): Configuration parameters for the graph.
-        llm_model: An instance of a language model client, configured for generating answers.
-        embedder_model: An instance of an embedding model client,
-        configured for generating embeddings.
-        verbose (bool): A flag indicating whether to show print statements during execution.
-        headless (bool): A flag indicating whether to run the graph in headless mode.
-
-    Args:
-        prompt (str): The prompt for the graph.
-        source (str): The source of the graph.
-        config (dict): Configuration parameters for the graph.
-
-    Example:
-        >>> smart_scraper = SmartScraperGraph(
-        ...     "List me all the attractions in Chioggia.",
-        ...     "https://en.wikipedia.org/wiki/Chioggia",
-        ...     {"llm": {"model": "gpt-3.5-turbo"}}
-        ... )
-        >>> result = smart_scraper.run()
-        )
-    """
-
-    def __init__(self, prompt: str, source: str, config: dict):
-        super().__init__(prompt, config, source)
-
-        self.input_key = "url" if source.startswith("http") else "local_dir"
-
-    def _create_graph(self) -> BaseGraph:
-        """
-        Creates the graph of nodes representing the workflow for web scraping.
-
-        Returns:
-            BaseGraph: A graph instance representing the web scraping workflow.
-        """
-        smart_scraper_graph = SmartScraperGraph(
-            prompt="",
-            source="",
-            config=self.llm_model
-        )
-        fetch_node = FetchNode(
-            input="url | local_dir",
-            output=["doc"]
-        )
-
-        parse_node = ParseNode(
-            input="doc",
-            output=["parsed_doc"],
-            node_config={
-                "chunk_size": self.model_token
-            }
-        )
-
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
-
-        search_link_with_context_node = SearchLinksWithContext(
-            input="user_prompt & (relevant_chunks | parsed_doc | doc)",
-            output=["answer"],
-            node_config={
-                "llm_model": self.llm_model
-            }
-        )
-
-        graph_iterator_node = GraphIteratorNode(
-            input="user_prompt & urls",
-            output=["results"],
-            node_config={
-                "graph_instance": smart_scraper_graph,
-                "verbose": True,
-            }
-        )
-
-        merge_answers_node = MergeAnswersNode(
-            input="user_prompt & results",
-            output=["answer"],
-            node_config={
-                "llm_model": self.llm_model,
-                "verbose": True,
-            }
-        )
-
-        return BaseGraph(
-            nodes=[
-                fetch_node,
-                parse_node,
-                rag_node,
-                search_link_with_context_node,
-                graph_iterator_node,
-                merge_answers_node
-
-            ],
-            edges=[
-                (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, search_link_with_context_node),
-                (search_link_with_context_node, graph_iterator_node),
-                (graph_iterator_node, merge_answers_node),
-
-            ],
-            entry_point=fetch_node
-        )
-
-    def run(self) -> str:
-        """
-        Executes the scraping process and returns the answer to the prompt.
-
-        Returns:
-            str: The answer to the prompt.
-        """
-
-        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
-        self.final_state, self.execution_info = self.graph.execute(inputs)
-
-        return self.final_state.get("answer", "No answer found.")