Merge pull request #456 from ScrapeGraphAI/refactoring-of-search_link_node

fix: search link node
2026-07-01 21:00:48 +08:00 · 2024-07-15 23:06:03 +02:00 · 2024-07-15 23:06:03 +02:00 · dccb893fdf
commit dccb893fdf
parent 2fa04b5815 830daee1f3
16 changed files with 683 additions and 49 deletions
--- a/examples/anthropic/search_link_graph_haiku.py
+++ b/examples/anthropic/search_link_graph_haiku.py
@ -0,0 +1,57 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+llm_model_instance = AzureChatOpenAI(
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
+)
+
+embedder_model_instance = AzureOpenAIEmbeddings(
+    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+)
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+graph_config = {
+    "llm": {"model_instance": llm_model_instance},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/azure/search_link_graph_azure.py
+++ b/examples/azure/search_link_graph_azure.py
@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+groq_key = os.getenv("GROQ_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "groq/gemma-7b-it",
+        "api_key": groq_key,
+        "temperature": 0
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/bedrock/search_link_graph_bedrock.py
+++ b/examples/bedrock/search_link_graph_bedrock.py
@ -0,0 +1,45 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "client": "client_name",
+        "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.0
+    },
+    "embeddings": {
+        "model": "bedrock/cohere.embed-multilingual-v3"
+    }
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/deepseek/search_link_graph_deepseek.py
+++ b/examples/deepseek/search_link_graph_deepseek.py
@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+deepseek_key = os.getenv("DEEPSEEK_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "deepseek-chat",
+        "openai_api_key": deepseek_key,
+        "openai_api_base": 'https://api.deepseek.com/v1',
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/ernie/search_graph_ernie.py
+++ b/examples/ernie/search_graph_ernie.py
@ -12,15 +12,18 @@ load_dotenv()
 # Define the configuration for the graph
 # ************************************************

-openai_key = os.getenv("OPENAI_APIKEY")
-
 graph_config = {
    "llm": {
-        "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
-    },
-    "max_results": 2,
-    "verbose": True,
+            "model": "ernie-bot-turbo",
+            "ernie_client_id": "<ernie_client_id>",
+            "ernie_client_secret": "<ernie_client_secret>",
+            "temperature": 0.1
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+            "base_url": "http://localhost:11434"},
+    "library": "beautifulsoup"
 }

 # ************************************************
--- a/examples/ernie/search_link_graph_ernie.py
+++ b/examples/ernie/search_link_graph_ernie.py
@ -0,0 +1,46 @@
+"""
+Example of Search Graph
+"""
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+            "model": "ernie-bot-turbo",
+            "ernie_client_id": "<ernie_client_id>",
+            "ernie_client_secret": "<ernie_client_secret>",
+            "temperature": 0.1
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+            "base_url": "http://localhost:11434"},
+    "library": "beautifulsoup"
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/fireworks/search_link_graph_fireworks.py
+++ b/examples/fireworks/search_link_graph_fireworks.py
@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "max_results": 2,
+    "verbose": True,
+    "headless": False,
+}
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/gemini/search_link_graph_gemini.py
+++ b/examples/gemini/search_link_graph_gemini.py
@ -0,0 +1,44 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+gemini_key = os.getenv("GOOGLE_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": gemini_key,
+        "model": "gemini-pro",
+    },
+}
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/groq/search_link_graph_groq.py
+++ b/examples/groq/search_link_graph_groq.py
@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+groq_key = os.getenv("GROQ_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "groq/gemma-7b-it",
+        "api_key": groq_key,
+        "temperature": 0
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/groq/smart_scraper_groq.py
+++ b/examples/groq/smart_scraper_groq.py
@ -9,7 +9,6 @@ from scrapegraphai.utils import prettify_exec_info

 load_dotenv()

-
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
--- a/examples/huggingfacehub/search_link_graph_huggingfacehub.py
+++ b/examples/huggingfacehub/search_link_graph_huggingfacehub.py
@ -0,0 +1,54 @@
+"""
+Example of Search Graph
+"""
+import os
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+# ************************************************
+
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+
+repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
+
+llm_model_instance = HuggingFaceEndpoint(
+    repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
+)
+
+embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
+    api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
+)
+
+graph_config = {
+    "llm": {"model_instance": llm_model_instance},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
--- a/examples/local_models/search_link_graph_ollama.py
+++ b/examples/local_models/search_link_graph_ollama.py
@ -0,0 +1,43 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+from scrapegraphai.graphs import SearchLinkGraph
+from scrapegraphai.utils import prettify_exec_info
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchLinkGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SearchLinkGraph(
+    source="https://sport.sky.it/nba?gr=www",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
--- a/scrapegraphai/graphs/init.py
+++ b/scrapegraphai/graphs/init.py
@ -23,3 +23,4 @@ from .xml_scraper_multi_graph import XMLScraperMultiGraph
 from .script_creator_multi_graph import ScriptCreatorMultiGraph
 from .markdown_scraper_graph import MDScraperGraph
 from .markdown_scraper_multi_graph import MDScraperMultiGraph
+from .search_link_graph import SearchLinkGraph
--- a/scrapegraphai/graphs/search_link_graph.py
+++ b/scrapegraphai/graphs/search_link_graph.py
@ -0,0 +1,104 @@
+""" SearchLinkGraph Module """
+from typing import Optional
+import logging
+from pydantic import BaseModel
+from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
+
+from ..nodes import ( FetchNode, ParseNode, SearchLinkNode )
+
+class SearchLinkGraph(AbstractGraph): 
+    """ 
+    SearchLinkGraph is a scraping pipeline that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel): The schema for the graph output.
+        llm_model: An instance of a language model client, configured for generating answers.
+        embedder_model: An instance of an embedding model client, 
+        configured for generating embeddings.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+
+    Args:
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel, optional): The schema for the graph output. Defaults to None.
+
+    Example:
+        >>> smart_scraper = SearchLinkGraph(
+        ...     "List me all the attractions in Chioggia.",
+        ...     "https://en.wikipedia.org/wiki/Chioggia",
+        ...     {"llm": {"model": "gpt-3.5-turbo"}}
+        ... )
+        >>> result = smart_scraper.run()
+    """
+
+    def __init__(self, source: str, config: dict, schema: Optional[BaseModel] = None):
+        super().__init__("", config, source, schema)
+
+        self.input_key = "url" if source.startswith("http") else "local_dir"
+
+    def _create_graph(self) -> BaseGraph:
+        """
+        Creates the graph of nodes representing the workflow for web scraping.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping workflow.
+        """
+
+        fetch_node = FetchNode(
+            input="url| local_dir",
+            output=["doc", "link_urls", "img_urls"],
+            node_config={
+                "llm_model": self.llm_model,
+                "force": self.config.get("force", False),
+                "cut": self.config.get("cut", True),
+                "loader_kwargs": self.config.get("loader_kwargs", {}),
+            }
+        )
+        parse_node = ParseNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "chunk_size": self.model_token
+            }
+        )
+        search_link_node = SearchLinkNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "chunk_size": self.model_token
+            }
+        )
+
+        return BaseGraph(
+            nodes=[
+                fetch_node,
+                parse_node,
+                search_link_node
+            ],
+            edges=[
+                (fetch_node, parse_node),
+                (parse_node, search_link_node)
+            ],
+            entry_point=fetch_node,
+            graph_name=self.__class__.__name__
+        )
+
+    def run(self) -> str:
+        """
+        Executes the scraping process and returns the answer to the prompt.
+
+        Returns:
+            str: The answer to the prompt.
+        """
+
+        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+
+        return self.final_state.get("parsed_doc", "No answer found.")
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@ -4,6 +4,7 @@ SearchLinkNode Module

 # Imports from standard library
 from typing import List, Optional
+import re
 from tqdm import tqdm

 # Imports from Langchain
@ -20,7 +21,7 @@ from .base_node import BaseNode
 class SearchLinkNode(BaseNode):
    """
    A node that can filter out the relevant links in the webpage content for the user prompt.
-    Node expects the aleready scrapped links on the webpage and hence it is expected
+    Node expects the already scrapped links on the webpage and hence it is expected
    that this node be used after the FetchNode.

    Attributes:
@ -67,39 +68,10 @@ class SearchLinkNode(BaseNode):

        self.logger.info(f"--- Executing {self.node_name} Node ---")

-        # Interpret input keys based on the provided input expression
-        input_keys = self.get_input_keys(state)

-        user_prompt = state[input_keys[0]]
-        parsed_content_chunks = state[input_keys[1]]
+        parsed_content_chunks = state.get("doc")
        output_parser = JsonOutputParser()

-        prompt_relevant_links = """
-            You are a website scraper and you have just scraped the following content from a website.
-            Content: {content}
-            
-            You are now tasked with identifying all hyper links within the content that are potentially
-            relevant to the user task: {user_prompt}
-            
-            Assume relevance broadly, including any links that might be related or potentially useful 
-            in relation to the task.
-
-            Sort it in order of importance, the first one should be the most important one, the last one
-            the least important
-            
-            Please list only valid URLs and make sure to err on the side of inclusion if it's uncertain 
-            whether the content at the link is directly relevant.
-
-            Output only a list of relevant links in the format:
-            [
-                "link1",
-                "link2",
-                "link3",
-                .
-                .
-                .
-            ]
-            """
        relevant_links = []

        for i, chunk in enumerate(
@ -109,15 +81,47 @@ class SearchLinkNode(BaseNode):
                disable=not self.verbose,
            )
        ):
-            merge_prompt = PromptTemplate(
-                template=prompt_relevant_links,
-                input_variables=["content", "user_prompt"],
-            )
-            merge_chain = merge_prompt | self.llm_model | output_parser
-            # merge_chain = merge_prompt | self.llm_model
-            answer = merge_chain.invoke(
-                {"content": chunk.page_content, "user_prompt": user_prompt}
-            )
-            relevant_links += answer
+            try:
+                # Primary approach: Regular expression to extract links
+                links = re.findall(r'https?://[^\s"<>\]]+', str(chunk.page_content))
+
+                relevant_links += links
+            except Exception as e:
+                # Fallback approach: Using the LLM to extract links
+                self.logger.error(f"Error extracting links: {e}. Falling back to LLM.")
+                prompt_relevant_links = """
+                    You are a website scraper and you have just scraped the following content from a website.
+                    Content: {content}
+                    
+                    Assume relevance broadly, including any links that might be related or potentially useful 
+                    in relation to the task.
+
+                    Sort it in order of importance, the first one should be the most important one, the last one
+                    the least important
+                    
+                    Please list only valid URLs and make sure to err on the side of inclusion if it's uncertain 
+                    whether the content at the link is directly relevant.
+
+                    Output only a list of relevant links in the format:
+                    [
+                        "link1",
+                        "link2",
+                        "link3",
+                        .
+                        .
+                        .
+                    ]
+                    """
+                
+                merge_prompt = PromptTemplate(
+                    template=prompt_relevant_links,
+                    input_variables=["content", "user_prompt"],
+                )
+                merge_chain = merge_prompt | self.llm_model | output_parser
+                answer = merge_chain.invoke(
+                    {"content": chunk.page_content}
+                )
+                relevant_links += answer
+
        state.update({self.output[0]: relevant_links})
        return state
--- a/tests/graphs/search_link_ollama.py
+++ b/tests/graphs/search_link_ollama.py
@ -0,0 +1,26 @@
+from scrapegraphai.graphs import SearchLinkGraph
+from scrapegraphai.utils import prettify_exec_info
+
+def test_smart_scraper_pipeline():
+    graph_config = {
+        "llm": {
+            "model": "ollama/llama3",
+            "temperature": 0,
+            "format": "json",
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+        },
+        "verbose": True,
+        "headless": False
+    }
+
+    smart_scraper_graph = SearchLinkGraph(
+        source="https://sport.sky.it/nba?gr=www",
+        config=graph_config
+    )
+
+    result = smart_scraper_graph.run()
+
+    assert result is not None