Merge branch 'pre/beta' into main

2026-06-23 21:00:30 +08:00 · 2024-06-25 10:47:15 +02:00 · 2024-06-25 10:47:15 +02:00 · 0dfa5bf07e
commit 0dfa5bf07e
parent f75e0835fc ec77ff7ea4
26 changed files with 239 additions and 93 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -39,6 +39,7 @@
 ## [1.7.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.6.1...v1.7.0) (2024-06-17)


+
 ### Features

 * add caching ([d790361](https://github.com/VinciGit00/Scrapegraph-ai/commit/d79036149a3197a385b73553f29df66d36480c38))
@ -136,6 +137,7 @@
 * **release:** 1.7.0-beta.8 [skip ci] ([a87702f](https://github.com/VinciGit00/Scrapegraph-ai/commit/a87702f107f3fd16ee73e1af1585cd763788bf46))
 * **release:** 1.7.0-beta.9 [skip ci] ([0c5d6e2](https://github.com/VinciGit00/Scrapegraph-ai/commit/0c5d6e2c82b9ee81c91cd2325948bb5a4eddcb31))

+
 ## [1.7.0-beta.12](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.7.0-beta.11...v1.7.0-beta.12) (2024-06-17)


--- a/examples/ernie/smart_scraper_schema_ernie.py
+++ b/examples/ernie/smart_scraper_schema_ernie.py
@ -2,32 +2,31 @@
 Basic example of scraping pipeline using SmartScraper with schema
 """

-import os, json
+import json
+import os
+from typing import Dict
+
 from dotenv import load_dotenv
+from pydantic import BaseModel
+
 from scrapegraphai.graphs import SmartScraperGraph

+
 load_dotenv()

 # ************************************************
 # Define the output schema for the graph
 # ************************************************

-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+
+class Project(BaseModel):
+    title: str
+    description: str
+
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]
+

 # ************************************************
 # Define the configuration for the graph
@ -37,7 +36,7 @@ openai_key = os.getenv("OPENAI_APIKEY")

 graph_config = {
    "llm": {
-        "api_key":openai_key,
+        "api_key": openai_key,
        "model": "gpt-3.5-turbo",
    },
    "verbose": True,
@ -51,8 +50,8 @@ graph_config = {
 smart_scraper_graph = SmartScraperGraph(
    prompt="List me all the projects with their description",
    source="https://perinim.github.io/projects/",
-    schema=schema,
-    config=graph_config
+    schema=Projects,
+    config=graph_config,
 )

 result = smart_scraper_graph.run()
--- a/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
+++ b/examples/huggingfacehub/smart_scraper_schema_huggingfacehub.py
@ -4,6 +4,9 @@ Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key

 import os
 from dotenv import load_dotenv
+from typing import Dict
+
+from pydantic import BaseModel
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
 from langchain_community.llms import HuggingFaceEndpoint
@ -13,22 +16,12 @@ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
 # Define the output schema for the graph
 # ************************************************

-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+class Project(BaseModel):
+    title: str
+    description: str
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]

 ## required environment variable in .env
 #HUGGINGFACEHUB_API_TOKEN
@ -61,7 +54,7 @@ graph_config = {
 smart_scraper_graph = SmartScraperGraph(
    prompt="List me all the projects with their description",
    source="https://perinim.github.io/projects/",
-    schema=schema,
+    schema=Projects,
    config=graph_config
 )
 result = smart_scraper_graph.run()
--- a/examples/mixed_models/smart_scraper_schema_groq_openai.py
+++ b/examples/mixed_models/smart_scraper_schema_groq_openai.py
@ -2,8 +2,13 @@
 Basic example of scraping pipeline using SmartScraper with schema
 """

-import os, json
+import json
+import os
+from typing import Dict, List
+
 from dotenv import load_dotenv
+from pydantic import BaseModel
+
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info

@ -13,22 +18,12 @@ load_dotenv()
 # Define the output schema for the graph
 # ************************************************

-schema= """
-    { 
-    "Projects": [
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            }, 
-        "Project #": 
-            { 
-                "title": "...", 
-                "description": "...", 
-            } 
-        ] 
-    } 
-"""
+class Project(BaseModel):
+    title: str
+    description: str
+
+class Projects(BaseModel):
+    Projects: Dict[str, Project]

 # ************************************************
 # Define the configuration for the graph
@ -60,7 +55,7 @@ smart_scraper_graph = SmartScraperGraph(
    prompt="List me all the projects with their description.",
    # also accepts a string with the already downloaded HTML code
    source="https://perinim.github.io/projects/",
-    schema=schema,
+    schema=Projects,
    config=graph_config
 )

--- a/examples/single_node/search_internet_node.py
+++ b/examples/single_node/search_internet_node.py
@ -0,0 +1,50 @@
+"""
+Example of custom graph using existing nodes
+"""
+
+from scrapegraphai.models import Ollama
+from scrapegraphai.nodes import SearchInternetNode
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "llama3",
+        "temperature": 0,
+        "streaming": True
+    },
+    "search_engine": "google",
+    "max_results": 3,
+    "verbose": True
+}
+
+# ************************************************
+# Define the node
+# ************************************************
+
+llm_model = Ollama(graph_config["llm"])
+
+search_node = SearchInternetNode(
+    input="user_input",
+    output=["search_results"],
+    node_config={
+        "llm_model": llm_model,
+        "search_engine": graph_config["search_engine"],
+        "max_results": graph_config["max_results"],
+        "verbose": graph_config["verbose"]
+    }
+)
+
+# ************************************************
+# Test the node
+# ************************************************
+
+state = {
+    "user_input": "What is the capital of France?"
+}
+
+result = search_node.execute(state)
+
+print(result)
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@ -39,7 +39,7 @@ class AbstractGraph(ABC):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client,
                        configured for generating embeddings.
--- a/scrapegraphai/graphs/csv_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_multi_graph.py
@ -5,6 +5,8 @@ CSVScraperMultiGraph Module
 from copy import copy, deepcopy
 from typing import List, Optional

+from pydantic import BaseModel
+
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .csv_scraper_graph import CSVScraperGraph
@ -32,7 +34,7 @@ class CSVScraperMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
@ -42,7 +44,7 @@ class CSVScraperMultiGraph(AbstractGraph):
        >>> result = search_graph.run()
    """

-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):

        self.max_results = config.get("max_results", 3)

--- a/scrapegraphai/graphs/deep_scraper_graph.py
+++ b/scrapegraphai/graphs/deep_scraper_graph.py
@ -34,7 +34,7 @@ class DeepScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -45,7 +45,7 @@ class DeepScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> deep_scraper = DeepScraperGraph(
--- a/scrapegraphai/graphs/json_scraper_graph.py
+++ b/scrapegraphai/graphs/json_scraper_graph.py
@ -23,7 +23,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -34,7 +34,7 @@ class JSONScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> json_scraper = JSONScraperGraph(
--- a/scrapegraphai/graphs/json_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/json_scraper_multi_graph.py
@ -33,7 +33,7 @@ class JSONScraperMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
--- a/scrapegraphai/graphs/omni_scraper_graph.py
+++ b/scrapegraphai/graphs/omni_scraper_graph.py
@ -29,7 +29,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -41,7 +41,7 @@ class OmniScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> omni_scraper = OmniScraperGraph(
--- a/scrapegraphai/graphs/omni_search_graph.py
+++ b/scrapegraphai/graphs/omni_search_graph.py
@ -34,7 +34,7 @@ class OmniSearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> omni_search_graph = OmniSearchGraph(
--- a/scrapegraphai/graphs/pdf_scraper_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_graph.py
@ -26,7 +26,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -38,7 +38,7 @@ class PDFScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> pdf_scraper = PDFScraperGraph(
--- a/scrapegraphai/graphs/pdf_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_multi_graph.py
@ -34,7 +34,7 @@ class PdfScraperMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
--- a/scrapegraphai/graphs/script_creator_graph.py
+++ b/scrapegraphai/graphs/script_creator_graph.py
@ -23,7 +23,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -36,7 +36,7 @@ class ScriptCreatorGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> script_creator = ScriptCreatorGraph(
--- a/scrapegraphai/graphs/script_creator_multi_graph.py
+++ b/scrapegraphai/graphs/script_creator_multi_graph.py
@ -5,6 +5,8 @@ ScriptCreatorMultiGraph Module
 from copy import copy, deepcopy
 from typing import List, Optional

+from pydantic import BaseModel
+
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from .script_creator_graph import ScriptCreatorGraph
@ -30,7 +32,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.
    Example:
        >>> script_graph = ScriptCreatorMultiGraph(
        ...     "What is Chioggia famous for?",
@ -41,7 +43,7 @@ class ScriptCreatorMultiGraph(AbstractGraph):
        >>> result = script_graph.run()
    """

-    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
+    def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):

        self.max_results = config.get("max_results", 3)

--- a/scrapegraphai/graphs/search_graph.py
+++ b/scrapegraphai/graphs/search_graph.py
@ -35,7 +35,7 @@ class SearchGraph(AbstractGraph):
    Args:
        prompt (str): The user prompt to search the internet.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = SearchGraph(
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@ -26,7 +26,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -37,7 +37,7 @@ class SmartScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> smart_scraper = SmartScraperGraph(
--- a/scrapegraphai/graphs/smart_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py
@ -33,7 +33,7 @@ class SmartScraperMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@ -28,7 +28,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
@ -39,7 +39,7 @@ class SpeechGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> speech_graph = SpeechGraph(
--- a/scrapegraphai/graphs/xml_scraper_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_graph.py
@ -24,7 +24,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.
        llm_model: An instance of a language model client, configured for generating answers.
        embedder_model: An instance of an embedding model client, 
        configured for generating embeddings.
@ -36,7 +36,7 @@ class XMLScraperGraph(AbstractGraph):
        prompt (str): The prompt for the graph.
        source (str): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (str): The schema for the graph output.
+        schema (BaseModel): The schema for the graph output.

    Example:
        >>> xml_scraper = XMLScraperGraph(
--- a/scrapegraphai/graphs/xml_scraper_multi_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_multi_graph.py
@ -34,7 +34,7 @@ class XMLScraperMultiGraph(AbstractGraph):
        prompt (str): The user prompt to search the internet.
        source (List[str]): The source of the graph.
        config (dict): Configuration parameters for the graph.
-        schema (Optional[str]): The schema for the graph output.
+        schema (Optional[BaseModel]): The schema for the graph output.

    Example:
        >>> search_graph = MultipleSearchGraph(
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@ -43,6 +43,7 @@ class SearchInternetNode(BaseNode):
        self.verbose = (
            False if node_config is None else node_config.get("verbose", False)
        )
+        self.search_engine = node_config.get("search_engine", "google")
        self.max_results = node_config.get("max_results", 3)

    def execute(self, state: dict) -> dict:
@ -97,7 +98,8 @@ class SearchInternetNode(BaseNode):

        self.logger.info(f"Search Query: {search_query}")

-        answer = search_on_web(query=search_query, max_results=self.max_results)
+        answer = search_on_web(query=search_query, max_results=self.max_results,
+                               search_engine=self.search_engine)

        if len(answer) == 0:
            # raise an exception if no answer is found
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@ -1,11 +1,12 @@
 """
-Module for making the request on the web
+research web module
 """
 import re
 from typing import List
 from langchain_community.tools import DuckDuckGoSearchResults
 from googlesearch import search as google_search
-
+import requests
+from bs4 import BeautifulSoup

 def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
    """
@ -13,34 +14,48 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =

    Args:
        query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, options include 'Google' or 'DuckDuckGo'. Default is 'Google'.
+        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing'. Default is 'Google'.
        max_results (int, optional): The maximum number of search results to return.

    Returns:
        List[str]: A list of URLs as strings that are the search results.

    Raises:
-        ValueError: If the search engine specified is neither 'Google' nor 'DuckDuckGo'.
+        ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing'.

    Example:
        >>> search_on_web("example query", search_engine="Google", max_results=5)
        ['http://example.com', 'http://example.org', ...]

-    This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs.
+    This function allows switching between Google, DuckDuckGo, and Bing to perform 
+    internet searches, returning a list of result URLs.
    """

    if search_engine.lower() == "google":
        res = []
-
        for url in google_search(query, stop=max_results):
            res.append(url)
        return res
+
    elif search_engine.lower() == "duckduckgo":
        research = DuckDuckGoSearchResults(max_results=max_results)
        res = research.run(query)
-
        links = re.findall(r'https?://[^\s,\]]+', res)
-
        return links
-    raise ValueError(
-        "The only search engines available are DuckDuckGo or Google")
+
+    elif search_engine.lower() == "bing":
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        search_url = f"https://www.bing.com/search?q={query}"
+        response = requests.get(search_url, headers=headers)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        search_results = []
+        for result in soup.find_all('li', class_='b_algo', limit=max_results):
+            link = result.find('a')['href']
+            search_results.append(link)
+        return search_results
+
+    raise ValueError("The only search engines available are DuckDuckGo, Google, or Bing")
--- a/tests/nodes/search_internet_node_test.py
+++ b/tests/nodes/search_internet_node_test.py
@ -0,0 +1,58 @@
+import unittest
+from scrapegraphai.models import Ollama
+from scrapegraphai.nodes import SearchInternetNode
+
+class TestSearchInternetNode(unittest.TestCase):
+
+    def setUp(self):
+        # Configuration for the graph
+        self.graph_config = {
+            "llm": {
+                "model": "llama3",
+                "temperature": 0,
+                "streaming": True
+            },
+            "search_engine": "google",
+            "max_results": 3,
+            "verbose": True
+        }
+
+        # Define the model
+        self.llm_model = Ollama(self.graph_config["llm"])
+
+        # Initialize the SearchInternetNode
+        self.search_node = SearchInternetNode(
+            input="user_input",
+            output=["search_results"],
+            node_config={
+                "llm_model": self.llm_model,
+                "search_engine": self.graph_config["search_engine"],
+                "max_results": self.graph_config["max_results"],
+                "verbose": self.graph_config["verbose"]
+            }
+        )
+
+    def test_execute_search_node(self):
+        # Initial state
+        state = {
+            "user_input": "What is the capital of France?"
+        }
+
+        # Expected output
+        expected_output = {
+            "user_input": "What is the capital of France?",
+            "search_results": [
+                "https://en.wikipedia.org/wiki/Paris",
+                "https://en.wikipedia.org/wiki/France",
+                "https://en.wikipedia.org/wiki/%C3%8Ele-de-France"
+            ]
+        }
+
+        # Execute the node
+        result = self.search_node.execute(state)
+
+        # Assert the results
+        self.assertEqual(result, expected_output)
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/utils/research_web_test.py
+++ b/tests/utils/research_web_test.py
@ -0,0 +1,28 @@
+import pytest
+from scrapegraphai.utils.research_web import search_on_web  # Replace with actual path to your file
+
+
+def test_google_search():
+    """Tests search_on_web with Google search engine."""
+    results = search_on_web("test query", search_engine="Google", max_results=2)
+    assert len(results) == 2
+    # You can further assert if the results actually contain 'test query' in the title/snippet using additional libraries
+
+def test_bing_search():
+    """Tests search_on_web with Bing search engine."""
+    results = search_on_web("test query", search_engine="Bing", max_results=1)
+    assert results is not None
+    # You can further assert if the results contain '.com' or '.org' in the domain
+
+
+def test_invalid_search_engine():
+    """Tests search_on_web with invalid search engine."""
+    with pytest.raises(ValueError):
+        search_on_web("test query", search_engine="Yahoo", max_results=5)
+
+
+def test_max_results():
+    """Tests search_on_web with different max_results values."""
+    results_5 = search_on_web("test query", max_results=5)
+    results_10 = search_on_web("test query", max_results=10)
+    assert len(results_5) <= len(results_10)