From d3e63d91be79f74e8a3fdb00e692d546c24cead5 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Fri, 12 Jul 2024 11:49:12 +0000
Subject: [PATCH 01/20] ci(release): 1.9.0-beta.3 [skip ci]

## [1.9.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.2...v1.9.0-beta.3) (2024-07-12)

### Bug Fixes

* solve a burr integration ([881290b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/881290b5066b39c505532656671fbf65f8fc312c))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 63eb6250..ac4e94f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.9.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.2...v1.9.0-beta.3) (2024-07-12)
+
+
+### Bug Fixes
+
+* solve a burr integration ([881290b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/881290b5066b39c505532656671fbf65f8fc312c))
+
 ## [1.9.0-beta.2](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.1...v1.9.0-beta.2) (2024-07-05)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 30dad8df..2de923c9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.9.0b2"
+version = "1.9.0b3"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From a7249685cb2b133beeea439d1337cb1adeb64acd Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Sun, 14 Jul 2024 10:24:38 +0200
Subject: [PATCH 02/20] removed rag node

---
 scrapegraphai/graphs/csv_scraper_graph.py        | 14 ++------------
 scrapegraphai/graphs/json_scraper_graph.py       | 14 ++------------
 scrapegraphai/graphs/markdown_scraper_graph.py   | 14 ++------------
 scrapegraphai/graphs/omni_scraper_graph.py       | 16 +++-------------
 scrapegraphai/graphs/pdf_scraper_graph.py        | 13 +------------
 scrapegraphai/graphs/smart_scraper_graph.py      | 13 ++-----------
 scrapegraphai/graphs/xml_scraper_graph.py        | 14 ++------------
 scrapegraphai/nodes/generate_answer_csv_node.py  |  4 ++--
 scrapegraphai/nodes/generate_answer_node.py      |  4 ++--
 scrapegraphai/nodes/generate_answer_omni_node.py |  4 ++--
 scrapegraphai/nodes/generate_answer_pdf_node.py  |  2 +-
 11 files changed, 21 insertions(+), 91 deletions(-)

diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py
index ea205bb3..f4efd1fb 100644
--- a/scrapegraphai/graphs/csv_scraper_graph.py
+++ b/scrapegraphai/graphs/csv_scraper_graph.py
@@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph
 
 from ..nodes import (
     FetchNode,
-    RAGNode,
     GenerateAnswerCSVNode
 )
 
@@ -37,14 +36,7 @@ class CSVScraperGraph(AbstractGraph):
             input="csv | csv_dir",
             output=["doc"],
         )
-        rag_node = RAGNode(
-            input="user_prompt & doc",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model,
-            }
-        )
+     
         generate_answer_node = GenerateAnswerCSVNode(
             input="user_prompt & (relevant_chunks | doc)",
             output=["answer"],
@@ -58,12 +50,10 @@ class CSVScraperGraph(AbstractGraph):
         return BaseGraph(
             nodes=[
                 fetch_node,
-                rag_node,
                 generate_answer_node,
             ],
             edges=[
-                (fetch_node, rag_node),
-                (rag_node, generate_answer_node)
+                (fetch_node, generate_answer_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py
index b85a34dc..fe54ebec 100644
--- a/scrapegraphai/graphs/json_scraper_graph.py
+++ b/scrapegraphai/graphs/json_scraper_graph.py
@@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph
 
 from ..nodes import (
     FetchNode,
-    RAGNode,
     GenerateAnswerNode
 )
 
@@ -62,14 +61,7 @@ class JSONScraperGraph(AbstractGraph):
             input="json | json_dir",
             output=["doc", "link_urls", "img_urls"],
         )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
+     
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
@@ -83,12 +75,10 @@ class JSONScraperGraph(AbstractGraph):
         return BaseGraph(
             nodes=[
                 fetch_node,
-                rag_node,
                 generate_answer_node,
             ],
             edges=[
-                (fetch_node, rag_node),
-                (rag_node, generate_answer_node)
+                (fetch_node, generate_answer_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/graphs/markdown_scraper_graph.py b/scrapegraphai/graphs/markdown_scraper_graph.py
index 66b161dc..c177facd 100644
--- a/scrapegraphai/graphs/markdown_scraper_graph.py
+++ b/scrapegraphai/graphs/markdown_scraper_graph.py
@@ -3,7 +3,7 @@ import logging
 from pydantic import BaseModel
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
-from ..nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode
+from ..nodes import FetchNode, ParseNode, GenerateAnswerNode
 
 class MDScraperGraph(AbstractGraph):
     """
@@ -63,14 +63,6 @@ class MDScraperGraph(AbstractGraph):
                 "chunk_size": self.model_token
             }
         )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
@@ -86,13 +78,11 @@ class MDScraperGraph(AbstractGraph):
             nodes=[
                 fetch_node,
                 parse_node,
-                rag_node,
                 generate_answer_node,
             ],
             edges=[
                 (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, generate_answer_node)
+                (parse_node, generate_answer_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/graphs/omni_scraper_graph.py b/scrapegraphai/graphs/omni_scraper_graph.py
index 7e34dab7..1965dc04 100644
--- a/scrapegraphai/graphs/omni_scraper_graph.py
+++ b/scrapegraphai/graphs/omni_scraper_graph.py
@@ -12,7 +12,6 @@ from ..nodes import (
     FetchNode,
     ParseNode,
     ImageToTextNode,
-    RAGNode,
     GenerateAnswerOmniNode
 )
 
@@ -89,14 +88,7 @@ class OmniScraperGraph(AbstractGraph):
                 "max_images": self.max_images
             }
         )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
+      
         generate_answer_omni_node = GenerateAnswerOmniNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
             output=["answer"],
@@ -112,14 +104,12 @@ class OmniScraperGraph(AbstractGraph):
                 fetch_node,
                 parse_node,
                 image_to_text_node,
-                rag_node,
                 generate_answer_omni_node,
             ],
             edges=[
                 (fetch_node, parse_node),
                 (parse_node, image_to_text_node),
-                (image_to_text_node, rag_node),
-                (rag_node, generate_answer_omni_node)
+                (image_to_text_node, generate_answer_omni_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
@@ -136,4 +126,4 @@ class OmniScraperGraph(AbstractGraph):
         inputs = {"user_prompt": self.prompt, self.input_key: self.source}
         self.final_state, self.execution_info = self.graph.execute(inputs)
 
-        return self.final_state.get("answer", "No answer found.")
\ No newline at end of file
+        return self.final_state.get("answer", "No answer found.")
diff --git a/scrapegraphai/graphs/pdf_scraper_graph.py b/scrapegraphai/graphs/pdf_scraper_graph.py
index 732b4789..049425d0 100644
--- a/scrapegraphai/graphs/pdf_scraper_graph.py
+++ b/scrapegraphai/graphs/pdf_scraper_graph.py
@@ -12,7 +12,6 @@ from .abstract_graph import AbstractGraph
 from ..nodes import (
     FetchNode,
     ParseNode,
-    RAGNode,
     GenerateAnswerPDFNode
 )
 
@@ -76,14 +75,6 @@ class PDFScraperGraph(AbstractGraph):
             }
         )
 
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
         generate_answer_node_pdf = GenerateAnswerPDFNode(
             input="user_prompt & (relevant_chunks | doc)",
             output=["answer"],
@@ -98,13 +89,11 @@ class PDFScraperGraph(AbstractGraph):
             nodes=[
                 fetch_node,
                 parse_node,
-                rag_node,
                 generate_answer_node_pdf,
             ],
             edges=[
                 (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, generate_answer_node_pdf)
+                (parse_node, generate_answer_node_pdf)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index ba27b60e..7862f88f 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -78,14 +78,7 @@ class SmartScraperGraph(AbstractGraph):
                 "chunk_size": self.model_token
             }
         )
-        rag_node = RAGNode(
-            input="user_prompt & (parsed_doc | doc)",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
+
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | parsed_doc | doc)",
             output=["answer"],
@@ -100,13 +93,11 @@ class SmartScraperGraph(AbstractGraph):
             nodes=[
                 fetch_node,
                 parse_node,
-                rag_node,
                 generate_answer_node,
             ],
             edges=[
                 (fetch_node, parse_node),
-                (parse_node, rag_node),
-                (rag_node, generate_answer_node)
+                (parse_node, generate_answer_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py
index 28c58bb2..24b1ff0d 100644
--- a/scrapegraphai/graphs/xml_scraper_graph.py
+++ b/scrapegraphai/graphs/xml_scraper_graph.py
@@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph
 
 from ..nodes import (
     FetchNode,
-    RAGNode,
     GenerateAnswerNode
 )
 
@@ -64,14 +63,7 @@ class XMLScraperGraph(AbstractGraph):
             input="xml | xml_dir",
             output=["doc", "link_urls", "img_urls"]
         )
-        rag_node = RAGNode(
-            input="user_prompt & doc",
-            output=["relevant_chunks"],
-            node_config={
-                "llm_model": self.llm_model,
-                "embedder_model": self.embedder_model
-            }
-        )
+     
         generate_answer_node = GenerateAnswerNode(
             input="user_prompt & (relevant_chunks | doc)",
             output=["answer"],
@@ -85,12 +77,10 @@ class XMLScraperGraph(AbstractGraph):
         return BaseGraph(
             nodes=[
                 fetch_node,
-                rag_node,
                 generate_answer_node,
             ],
             edges=[
-                (fetch_node, rag_node),
-                (rag_node, generate_answer_node)
+                (fetch_node, generate_answer_node)
             ],
             entry_point=fetch_node,
             graph_name=self.__class__.__name__
diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py
index 58adb1d4..6008dbdd 100644
--- a/scrapegraphai/nodes/generate_answer_csv_node.py
+++ b/scrapegraphai/nodes/generate_answer_csv_node.py
@@ -125,7 +125,7 @@ class GenerateAnswerCSVNode(BaseNode):
                     template=template_no_chunks_csv_prompt,
                     input_variables=["question"],
                     partial_variables={
-                        "context": chunk.page_content,
+                        "context": chunk,
                         "format_instructions": format_instructions,
                     },
                 )
@@ -137,7 +137,7 @@ class GenerateAnswerCSVNode(BaseNode):
                     template=template_chunks_csv_prompt,
                     input_variables=["question"],
                     partial_variables={
-                        "context": chunk.page_content,
+                        "context": chunk,
                         "chunk_id": i + 1,
                         "format_instructions": format_instructions,
                     },
diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
index fabb4e66..3ea8a128 100644
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@@ -115,7 +115,7 @@ class GenerateAnswerNode(BaseNode):
                 prompt = PromptTemplate(
                     template=template_no_chunks_prompt,
                     input_variables=["question"],
-                    partial_variables={"context": chunk.page_content,
+                    partial_variables={"context": chunk,
                                        "format_instructions": format_instructions})
                 chain =  prompt | self.llm_model | output_parser
                 answer = chain.invoke({"question": user_prompt})
@@ -124,7 +124,7 @@ class GenerateAnswerNode(BaseNode):
                 prompt = PromptTemplate(
                     template=template_chunks_prompt,
                     input_variables=["question"],
-                    partial_variables={"context": chunk.page_content,
+                    partial_variables={"context": chunk,
                                         "chunk_id": i + 1,
                                         "format_instructions": format_instructions})
             # Dynamically name the chains based on their index
diff --git a/scrapegraphai/nodes/generate_answer_omni_node.py b/scrapegraphai/nodes/generate_answer_omni_node.py
index e6ea9206..f5474177 100644
--- a/scrapegraphai/nodes/generate_answer_omni_node.py
+++ b/scrapegraphai/nodes/generate_answer_omni_node.py
@@ -110,7 +110,7 @@ class GenerateAnswerOmniNode(BaseNode):
                     template=template_no_chunk_omni_prompt,
                     input_variables=["question"],
                     partial_variables={
-                        "context": chunk.page_content,
+                        "context": chunk,
                         "format_instructions": format_instructions,
                         "img_desc": imag_desc,
                     },
@@ -123,7 +123,7 @@ class GenerateAnswerOmniNode(BaseNode):
                     template=template_chunks_omni_prompt,
                     input_variables=["question"],
                     partial_variables={
-                        "context": chunk.page_content,
+                        "context": chunk,
                         "chunk_id": i + 1,
                         "format_instructions": format_instructions,
                     },
diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py
index c6509f34..fac25c06 100644
--- a/scrapegraphai/nodes/generate_answer_pdf_node.py
+++ b/scrapegraphai/nodes/generate_answer_pdf_node.py
@@ -124,7 +124,7 @@ class GenerateAnswerPDFNode(BaseNode):
                     template=template_no_chunks_pdf_prompt,
                     input_variables=["question"],
                     partial_variables={
-                        "context":chunk.page_content,
+                        "context":chunk,
                         "format_instructions": format_instructions,
                     },
                 )

From cf3ab5564ae5c415c63d1771b32ea68f5169ca82 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Sun, 14 Jul 2024 16:49:29 +0200
Subject: [PATCH 03/20] fix: search link node

---
 scrapegraphai/nodes/search_link_node.py | 83 ++++++++++++++-----------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py
index 2a0c5f18..8c81d07b 100644
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@@ -4,6 +4,7 @@ SearchLinkNode Module
 
 # Imports from standard library
 from typing import List, Optional
+import re
 from tqdm import tqdm
 
 # Imports from Langchain
@@ -20,7 +21,7 @@ from .base_node import BaseNode
 class SearchLinkNode(BaseNode):
     """
     A node that can filter out the relevant links in the webpage content for the user prompt.
-    Node expects the aleready scrapped links on the webpage and hence it is expected
+    Node expects the already scrapped links on the webpage and hence it is expected
     that this node be used after the FetchNode.
 
     Attributes:
@@ -74,32 +75,6 @@ class SearchLinkNode(BaseNode):
         parsed_content_chunks = state[input_keys[1]]
         output_parser = JsonOutputParser()
 
-        prompt_relevant_links = """
-            You are a website scraper and you have just scraped the following content from a website.
-            Content: {content}
-            
-            You are now tasked with identifying all hyper links within the content that are potentially
-            relevant to the user task: {user_prompt}
-            
-            Assume relevance broadly, including any links that might be related or potentially useful 
-            in relation to the task.
-
-            Sort it in order of importance, the first one should be the most important one, the last one
-            the least important
-            
-            Please list only valid URLs and make sure to err on the side of inclusion if it's uncertain 
-            whether the content at the link is directly relevant.
-
-            Output only a list of relevant links in the format:
-            [
-                "link1",
-                "link2",
-                "link3",
-                .
-                .
-                .
-            ]
-            """
         relevant_links = []
 
         for i, chunk in enumerate(
@@ -109,15 +84,49 @@ class SearchLinkNode(BaseNode):
                 disable=not self.verbose,
             )
         ):
-            merge_prompt = PromptTemplate(
-                template=prompt_relevant_links,
-                input_variables=["content", "user_prompt"],
-            )
-            merge_chain = merge_prompt | self.llm_model | output_parser
-            # merge_chain = merge_prompt | self.llm_model
-            answer = merge_chain.invoke(
-                {"content": chunk.page_content, "user_prompt": user_prompt}
-            )
-            relevant_links += answer
+            try:
+                # Primary approach: Regular expression to extract links
+                links = re.findall(r'(https?://\S+)', chunk.page_content)
+                relevant_links += links
+            except Exception as e:
+                # Fallback approach: Using the LLM to extract links
+                self.logger.error(f"Error extracting links: {e}. Falling back to LLM.")
+                prompt_relevant_links = """
+                    You are a website scraper and you have just scraped the following content from a website.
+                    Content: {content}
+                    
+                    You are now tasked with identifying all hyper links within the content that are potentially
+                    relevant to the user task: {user_prompt}
+                    
+                    Assume relevance broadly, including any links that might be related or potentially useful 
+                    in relation to the task.
+
+                    Sort it in order of importance, the first one should be the most important one, the last one
+                    the least important
+                    
+                    Please list only valid URLs and make sure to err on the side of inclusion if it's uncertain 
+                    whether the content at the link is directly relevant.
+
+                    Output only a list of relevant links in the format:
+                    [
+                        "link1",
+                        "link2",
+                        "link3",
+                        .
+                        .
+                        .
+                    ]
+                    """
+                
+                merge_prompt = PromptTemplate(
+                    template=prompt_relevant_links,
+                    input_variables=["content", "user_prompt"],
+                )
+                merge_chain = merge_prompt | self.llm_model | output_parser
+                answer = merge_chain.invoke(
+                    {"content": chunk.page_content, "user_prompt": user_prompt}
+                )
+                relevant_links += answer
+
         state.update({self.output[0]: relevant_links})
         return state

From 7e5789baa300d43deae3711176c24429d03521fd Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Sun, 14 Jul 2024 19:31:22 +0200
Subject: [PATCH 04/20] Update research_web.py

---
 scrapegraphai/utils/research_web.py | 38 +++++++++++++++++------------
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index ac7fc09d..b439b14d 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -1,6 +1,3 @@
-"""
-research web module
-"""
 import re
 from typing import List
 from langchain_community.tools import DuckDuckGoSearchResults
@@ -8,41 +5,39 @@ from googlesearch import search as google_search
 import requests
 from bs4 import BeautifulSoup
 
-def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
+def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10, port: int = 8080) -> List[str]:
     """
     Searches the web for a given query using specified search engine options.
 
     Args:
         query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', or 'Bing'. Default is 'Google'.
+        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearcNGX'. Default is 'Google'.
         max_results (int, optional): The maximum number of search results to return.
+        port (int, optional): The port number to use when searching with 'SearcNGX'. Default is 8080.
 
     Returns:
         List[str]: A list of URLs as strings that are the search results.
 
     Raises:
-        ValueError: If the search engine specified is neither 'Google', 'DuckDuckGo', nor 'Bing'.
+        ValueError: If the search engine specified is not supported.
 
     Example:
         >>> search_on_web("example query", search_engine="Google", max_results=5)
         ['http://example.com', 'http://example.org', ...]
-
-    This function allows switching between Google, DuckDuckGo, and Bing to perform 
-    internet searches, returning a list of result URLs.
     """
-
+    
     if search_engine.lower() == "google":
         res = []
         for url in google_search(query, stop=max_results):
             res.append(url)
         return res
-
+    
     elif search_engine.lower() == "duckduckgo":
         research = DuckDuckGoSearchResults(max_results=max_results)
         res = research.run(query)
         links = re.findall(r'https?://[^\s,\]]+', res)
         return links
-
+    
     elif search_engine.lower() == "bing":
         headers = {
             "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
@@ -51,11 +46,24 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
         response = requests.get(search_url, headers=headers)
         response.raise_for_status()
         soup = BeautifulSoup(response.text, "html.parser")
-
+    
         search_results = []
         for result in soup.find_all('li', class_='b_algo', limit=max_results):
             link = result.find('a')['href']
             search_results.append(link)
         return search_results
-
-    raise ValueError("The only search engines available are DuckDuckGo, Google, or Bing")
+    
+    elif search_engine.lower() == "searcngx":
+        url = f"http://localhost:{port}"
+        params = {"q": query, "format": "json"}
+    
+        # Send the GET request to the server
+        response = requests.get(url, params=params)
+    
+        # Parse the response and limit to the specified max_results
+        data = response.json()
+        limited_results = data["results"][:max_results]
+        return limited_results
+    
+    else:
+        raise ValueError("The only search engines available are DuckDuckGo, Google, Bing, or SearcNGX")

From 5c9218608140bf694fbfd96aa90276bc438bb475 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Sun, 14 Jul 2024 19:32:18 +0200
Subject: [PATCH 05/20] feat: add searchngx integration


From fd1b7cb24a7c252277607abde35826e3c58e34ef Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Sun, 14 Jul 2024 22:36:43 +0200
Subject: [PATCH 06/20] chore: remove unused import

---
 scrapegraphai/graphs/smart_scraper_graph.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 7862f88f..cb4777a8 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -11,7 +11,6 @@ from .abstract_graph import AbstractGraph
 from ..nodes import (
     FetchNode,
     ParseNode,
-    RAGNode,
     GenerateAnswerNode
 )
 

From 7ba2f6ae0b9d2e9336e973e1f57ab8355c739e27 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Sun, 14 Jul 2024 22:39:34 +0200
Subject: [PATCH 07/20] chore: correct search engine name

---
 scrapegraphai/utils/research_web.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index b439b14d..101693e4 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -11,9 +11,9 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
 
     Args:
         query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearcNGX'. Default is 'Google'.
+        search_engine (str, optional): Specifies the search engine to use, options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG'. Default is 'Google'.
         max_results (int, optional): The maximum number of search results to return.
-        port (int, optional): The port number to use when searching with 'SearcNGX'. Default is 8080.
+        port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
 
     Returns:
         List[str]: A list of URLs as strings that are the search results.
@@ -53,7 +53,7 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
             search_results.append(link)
         return search_results
     
-    elif search_engine.lower() == "searcngx":
+    elif search_engine.lower() == "searxng":
         url = f"http://localhost:{port}"
         params = {"q": query, "format": "json"}
     
@@ -66,4 +66,4 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
         return limited_results
     
     else:
-        raise ValueError("The only search engines available are DuckDuckGo, Google, Bing, or SearcNGX")
+        raise ValueError("The only search engines available are DuckDuckGo, Google, Bing, or SearXNG")

From 2fa04b58159abf7af890ebc0768fe23d51bf177f Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Sun, 14 Jul 2024 20:57:09 +0000
Subject: [PATCH 08/20] ci(release): 1.9.0-beta.4 [skip ci]

## [1.9.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.3...v1.9.0-beta.4) (2024-07-14)

### Features

* add searchngx integration ([5c92186](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c9218608140bf694fbfd96aa90276bc438bb475))

### chore

* correct search engine name ([7ba2f6a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7ba2f6ae0b9d2e9336e973e1f57ab8355c739e27))
* remove unused import ([fd1b7cb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fd1b7cb24a7c252277607abde35826e3c58e34ef))
---
 CHANGELOG.md   | 13 +++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ac4e94f0..3e50e973 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,16 @@
+## [1.9.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.3...v1.9.0-beta.4) (2024-07-14)
+
+
+### Features
+
+* add searchngx integration ([5c92186](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/5c9218608140bf694fbfd96aa90276bc438bb475))
+
+
+### chore
+
+* correct search engine name ([7ba2f6a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/7ba2f6ae0b9d2e9336e973e1f57ab8355c739e27))
+* remove unused import ([fd1b7cb](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fd1b7cb24a7c252277607abde35826e3c58e34ef))
+
 ## [1.9.0-beta.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.2...v1.9.0-beta.3) (2024-07-12)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 2de923c9..a841a4a0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.9.0b3"
+version = "1.9.0b4"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 57fdaf9e3a67e5006ffd5149fbbf1ec468ed16a4 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 15 Jul 2024 12:36:11 +0200
Subject: [PATCH 09/20] create search_link_graph

---
 .../local_models/search_link_graph_ollama.py  |  43 ++++++++
 scrapegraphai/graphs/__init__.py              |   1 +
 scrapegraphai/graphs/search_link_graph.py     | 104 ++++++++++++++++++
 scrapegraphai/nodes/search_link_node.py       |  15 +--
 4 files changed, 153 insertions(+), 10 deletions(-)
 create mode 100644 examples/local_models/search_link_graph_ollama.py
 create mode 100644 scrapegraphai/graphs/search_link_graph.py

diff --git a/examples/local_models/search_link_graph_ollama.py b/examples/local_models/search_link_graph_ollama.py
new file mode 100644
index 00000000..5c594270
--- /dev/null
+++ b/examples/local_models/search_link_graph_ollama.py
@@ -0,0 +1,43 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+from scrapegraphai.graphs import SearchLinkGraph
+from scrapegraphai.utils import prettify_exec_info
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "model": "ollama/llama3",
+        "temperature": 0,
+        "format": "json",  # Ollama needs the format to be specified explicitly
+        # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
+    },
+    "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchLinkGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SearchLinkGraph(
+    source="https://sport.sky.it/nba?gr=www",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py
index b1bf1242..26a0b9e1 100644
--- a/scrapegraphai/graphs/__init__.py
+++ b/scrapegraphai/graphs/__init__.py
@@ -23,3 +23,4 @@ from .xml_scraper_multi_graph import XMLScraperMultiGraph
 from .script_creator_multi_graph import ScriptCreatorMultiGraph
 from .markdown_scraper_graph import MDScraperGraph
 from .markdown_scraper_multi_graph import MDScraperMultiGraph
+from .search_link_graph import SearchLinkGraph
diff --git a/scrapegraphai/graphs/search_link_graph.py b/scrapegraphai/graphs/search_link_graph.py
new file mode 100644
index 00000000..2e23357c
--- /dev/null
+++ b/scrapegraphai/graphs/search_link_graph.py
@@ -0,0 +1,104 @@
+""" SearchLinkGraph Module """
+from typing import Optional
+import logging
+from pydantic import BaseModel
+from .base_graph import BaseGraph
+from .abstract_graph import AbstractGraph
+
+
+from ..nodes import ( FetchNode, ParseNode, SearchLinkNode )
+
+class SearchLinkGraph(AbstractGraph): 
+    """ 
+    SearchLinkGraph is a scraping pipeline that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts.
+
+    Attributes:
+        prompt (str): The prompt for the graph.
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel): The schema for the graph output.
+        llm_model: An instance of a language model client, configured for generating answers.
+        embedder_model: An instance of an embedding model client, 
+        configured for generating embeddings.
+        verbose (bool): A flag indicating whether to show print statements during execution.
+        headless (bool): A flag indicating whether to run the graph in headless mode.
+
+    Args:
+        source (str): The source of the graph.
+        config (dict): Configuration parameters for the graph.
+        schema (BaseModel, optional): The schema for the graph output. Defaults to None.
+
+    Example:
+        >>> smart_scraper = SearchLinkGraph(
+        ...     "List me all the attractions in Chioggia.",
+        ...     "https://en.wikipedia.org/wiki/Chioggia",
+        ...     {"llm": {"model": "gpt-3.5-turbo"}}
+        ... )
+        >>> result = smart_scraper.run()
+    """
+
+    def __init__(self, source: str, config: dict, schema: Optional[BaseModel] = None):
+        super().__init__("", config, source, schema)
+
+        self.input_key = "url" if source.startswith("http") else "local_dir"
+
+    def _create_graph(self) -> BaseGraph:
+        """
+        Creates the graph of nodes representing the workflow for web scraping.
+
+        Returns:
+            BaseGraph: A graph instance representing the web scraping workflow.
+        """
+
+        fetch_node = FetchNode(
+            input="url| local_dir",
+            output=["doc", "link_urls", "img_urls"],
+            node_config={
+                "llm_model": self.llm_model,
+                "force": self.config.get("force", False),
+                "cut": self.config.get("cut", True),
+                "loader_kwargs": self.config.get("loader_kwargs", {}),
+            }
+        )
+        parse_node = ParseNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "chunk_size": self.model_token
+            }
+        )
+        search_link_node = SearchLinkNode(
+            input="doc",
+            output=["parsed_doc"],
+            node_config={
+                "llm_model": self.llm_model,
+                "chunk_size": self.model_token
+            }
+        )
+
+        return BaseGraph(
+            nodes=[
+                fetch_node,
+                parse_node,
+                search_link_node
+            ],
+            edges=[
+                (fetch_node, parse_node),
+                (parse_node, search_link_node)
+            ],
+            entry_point=fetch_node,
+            graph_name=self.__class__.__name__
+        )
+
+    def run(self) -> str:
+        """
+        Executes the scraping process and returns the answer to the prompt.
+
+        Returns:
+            str: The answer to the prompt.
+        """
+
+        inputs = {"user_prompt": self.prompt, self.input_key: self.source}
+        self.final_state, self.execution_info = self.graph.execute(inputs)
+
+        return self.final_state.get("parsed_doc", "No answer found.")
\ No newline at end of file
diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py
index 8c81d07b..ffc8a71b 100644
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@@ -68,11 +68,8 @@ class SearchLinkNode(BaseNode):
 
         self.logger.info(f"--- Executing {self.node_name} Node ---")
 
-        # Interpret input keys based on the provided input expression
-        input_keys = self.get_input_keys(state)
 
-        user_prompt = state[input_keys[0]]
-        parsed_content_chunks = state[input_keys[1]]
+        parsed_content_chunks = state.get("doc")
         output_parser = JsonOutputParser()
 
         relevant_links = []
@@ -86,7 +83,8 @@ class SearchLinkNode(BaseNode):
         ):
             try:
                 # Primary approach: Regular expression to extract links
-                links = re.findall(r'(https?://\S+)', chunk.page_content)
+                links = re.findall(r'https?://[^\s"<>\]]+', str(chunk.page_content))
+
                 relevant_links += links
             except Exception as e:
                 # Fallback approach: Using the LLM to extract links
@@ -95,9 +93,6 @@ class SearchLinkNode(BaseNode):
                     You are a website scraper and you have just scraped the following content from a website.
                     Content: {content}
                     
-                    You are now tasked with identifying all hyper links within the content that are potentially
-                    relevant to the user task: {user_prompt}
-                    
                     Assume relevance broadly, including any links that might be related or potentially useful 
                     in relation to the task.
 
@@ -124,9 +119,9 @@ class SearchLinkNode(BaseNode):
                 )
                 merge_chain = merge_prompt | self.llm_model | output_parser
                 answer = merge_chain.invoke(
-                    {"content": chunk.page_content, "user_prompt": user_prompt}
+                    {"content": chunk.page_content}
                 )
                 relevant_links += answer
 
         state.update({self.output[0]: relevant_links})
-        return state
+        return state
\ No newline at end of file

From f42d47e2c36d9fb4a4add9235885f98d43e6083c Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 15 Jul 2024 20:27:44 +0200
Subject: [PATCH 10/20] add examples

---
 examples/anthropic/search_link_graph_haiku.py | 57 +++++++++++++++++++
 examples/azure/search_link_graph_azure.py     | 52 +++++++++++++++++
 examples/bedrock/search_link_graph_bedrock.py | 45 +++++++++++++++
 .../deepseek/search_link_graph_deepseek.py    | 52 +++++++++++++++++
 examples/ernie/search_graph_ernie.py          | 17 +++---
 examples/ernie/search_link_graph_ernie.py     | 46 +++++++++++++++
 .../fireworks/search_link_graph_fireworks.py  | 52 +++++++++++++++++
 examples/gemini/search_link_graph_gemini.py   | 44 ++++++++++++++
 examples/groq/search_link_graph_groq.py       | 52 +++++++++++++++++
 examples/groq/smart_scraper_groq.py           |  1 -
 .../search_link_graph_huggingfacehub.py       | 54 ++++++++++++++++++
 11 files changed, 464 insertions(+), 8 deletions(-)
 create mode 100644 examples/anthropic/search_link_graph_haiku.py
 create mode 100644 examples/azure/search_link_graph_azure.py
 create mode 100644 examples/bedrock/search_link_graph_bedrock.py
 create mode 100644 examples/deepseek/search_link_graph_deepseek.py
 create mode 100644 examples/ernie/search_link_graph_ernie.py
 create mode 100644 examples/fireworks/search_link_graph_fireworks.py
 create mode 100644 examples/gemini/search_link_graph_gemini.py
 create mode 100644 examples/groq/search_link_graph_groq.py
 create mode 100644 examples/huggingfacehub/search_link_graph_huggingfacehub.py

diff --git a/examples/anthropic/search_link_graph_haiku.py b/examples/anthropic/search_link_graph_haiku.py
new file mode 100644
index 00000000..ccfbc1d2
--- /dev/null
+++ b/examples/anthropic/search_link_graph_haiku.py
@@ -0,0 +1,57 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+from langchain_openai import AzureChatOpenAI
+from langchain_openai import AzureOpenAIEmbeddings
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+llm_model_instance = AzureChatOpenAI(
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+    azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
+)
+
+embedder_model_instance = AzureOpenAIEmbeddings(
+    azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
+    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
+)
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+graph_config = {
+    "llm": {"model_instance": llm_model_instance},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/azure/search_link_graph_azure.py b/examples/azure/search_link_graph_azure.py
new file mode 100644
index 00000000..f940c2a4
--- /dev/null
+++ b/examples/azure/search_link_graph_azure.py
@@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+groq_key = os.getenv("GROQ_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "groq/gemma-7b-it",
+        "api_key": groq_key,
+        "temperature": 0
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/bedrock/search_link_graph_bedrock.py b/examples/bedrock/search_link_graph_bedrock.py
new file mode 100644
index 00000000..116dea01
--- /dev/null
+++ b/examples/bedrock/search_link_graph_bedrock.py
@@ -0,0 +1,45 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "client": "client_name",
+        "model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+        "temperature": 0.0
+    },
+    "embeddings": {
+        "model": "bedrock/cohere.embed-multilingual-v3"
+    }
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/deepseek/search_link_graph_deepseek.py b/examples/deepseek/search_link_graph_deepseek.py
new file mode 100644
index 00000000..30e4a9b3
--- /dev/null
+++ b/examples/deepseek/search_link_graph_deepseek.py
@@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+deepseek_key = os.getenv("DEEPSEEK_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "deepseek-chat",
+        "openai_api_key": deepseek_key,
+        "openai_api_base": 'https://api.deepseek.com/v1',
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/ernie/search_graph_ernie.py b/examples/ernie/search_graph_ernie.py
index 22802c6e..c04d9f9b 100644
--- a/examples/ernie/search_graph_ernie.py
+++ b/examples/ernie/search_graph_ernie.py
@@ -12,15 +12,18 @@ load_dotenv()
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("OPENAI_APIKEY")
-
 graph_config = {
     "llm": {
-        "api_key": openai_key,
-        "model": "gpt-3.5-turbo",
-    },
-    "max_results": 2,
-    "verbose": True,
+            "model": "ernie-bot-turbo",
+            "ernie_client_id": "<ernie_client_id>",
+            "ernie_client_secret": "<ernie_client_secret>",
+            "temperature": 0.1
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+            "base_url": "http://localhost:11434"},
+    "library": "beautifulsoup"
 }
 
 # ************************************************
diff --git a/examples/ernie/search_link_graph_ernie.py b/examples/ernie/search_link_graph_ernie.py
new file mode 100644
index 00000000..466b230c
--- /dev/null
+++ b/examples/ernie/search_link_graph_ernie.py
@@ -0,0 +1,46 @@
+"""
+Example of Search Graph
+"""
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+            "model": "ernie-bot-turbo",
+            "ernie_client_id": "<ernie_client_id>",
+            "ernie_client_secret": "<ernie_client_secret>",
+            "temperature": 0.1
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+            "base_url": "http://localhost:11434"},
+    "library": "beautifulsoup"
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/fireworks/search_link_graph_fireworks.py b/examples/fireworks/search_link_graph_fireworks.py
new file mode 100644
index 00000000..a1d3a979
--- /dev/null
+++ b/examples/fireworks/search_link_graph_fireworks.py
@@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "max_results": 2,
+    "verbose": True,
+    "headless": False,
+}
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/gemini/search_link_graph_gemini.py b/examples/gemini/search_link_graph_gemini.py
new file mode 100644
index 00000000..937038bd
--- /dev/null
+++ b/examples/gemini/search_link_graph_gemini.py
@@ -0,0 +1,44 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+gemini_key = os.getenv("GOOGLE_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": gemini_key,
+        "model": "gemini-pro",
+    },
+}
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/groq/search_link_graph_groq.py b/examples/groq/search_link_graph_groq.py
new file mode 100644
index 00000000..f940c2a4
--- /dev/null
+++ b/examples/groq/search_link_graph_groq.py
@@ -0,0 +1,52 @@
+"""
+Example of Search Graph
+"""
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+load_dotenv()
+
+groq_key = os.getenv("GROQ_APIKEY")
+
+graph_config = {
+    "llm": {
+        "model": "groq/gemma-7b-it",
+        "api_key": groq_key,
+        "temperature": 0
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "headless": False
+}
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/groq/smart_scraper_groq.py b/examples/groq/smart_scraper_groq.py
index c1a5d319..f828cdec 100644
--- a/examples/groq/smart_scraper_groq.py
+++ b/examples/groq/smart_scraper_groq.py
@@ -9,7 +9,6 @@ from scrapegraphai.utils import prettify_exec_info
 
 load_dotenv()
 
-
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
diff --git a/examples/huggingfacehub/search_link_graph_huggingfacehub.py b/examples/huggingfacehub/search_link_graph_huggingfacehub.py
new file mode 100644
index 00000000..a49fb3b9
--- /dev/null
+++ b/examples/huggingfacehub/search_link_graph_huggingfacehub.py
@@ -0,0 +1,54 @@
+"""
+Example of Search Graph
+"""
+import os
+from scrapegraphai.graphs import SearchGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+from langchain_community.llms import HuggingFaceEndpoint
+from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+# ************************************************
+
+HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
+
+repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
+
+llm_model_instance = HuggingFaceEndpoint(
+    repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
+)
+
+embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
+    api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
+)
+
+graph_config = {
+    "llm": {"model_instance": llm_model_instance},
+    "embeddings": {"model_instance": embedder_model_instance}
+}
+
+
+# ************************************************
+# Create the SearchGraph instance and run it
+# ************************************************
+
+search_graph = SearchGraph(
+    prompt="List me the best escursions near Trento",
+    config=graph_config
+)
+
+result = search_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = search_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json and csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")

From da0b744443ba8955f2336592220f4778d6f15f52 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 15 Jul 2024 20:46:22 +0200
Subject: [PATCH 11/20] add test

---
 tests/graphs/search_link_ollama.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 tests/graphs/search_link_ollama.py

diff --git a/tests/graphs/search_link_ollama.py b/tests/graphs/search_link_ollama.py
new file mode 100644
index 00000000..3b41f699
--- /dev/null
+++ b/tests/graphs/search_link_ollama.py
@@ -0,0 +1,26 @@
+from scrapegraphai.graphs import SearchLinkGraph
+from scrapegraphai.utils import prettify_exec_info
+
+def test_smart_scraper_pipeline():
+    graph_config = {
+        "llm": {
+            "model": "ollama/llama3",
+            "temperature": 0,
+            "format": "json",
+        },
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+        },
+        "verbose": True,
+        "headless": False
+    }
+
+    smart_scraper_graph = SearchLinkGraph(
+        source="https://sport.sky.it/nba?gr=www",
+        config=graph_config
+    )
+
+    result = smart_scraper_graph.run()
+
+    assert result is not None

From 830daee1f32c01728bec180787de8808b275a17e Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Mon, 15 Jul 2024 20:47:09 +0200
Subject: [PATCH 12/20] Update search_link_node.py

---
 scrapegraphai/nodes/search_link_node.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scrapegraphai/nodes/search_link_node.py b/scrapegraphai/nodes/search_link_node.py
index ffc8a71b..b3d289d9 100644
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@@ -124,4 +124,4 @@ class SearchLinkNode(BaseNode):
                 relevant_links += answer
 
         state.update({self.output[0]: relevant_links})
-        return state
\ No newline at end of file
+        return state

From bb624399cfc3924825892dd48697fc298ad3b002 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Mon, 15 Jul 2024 21:07:35 +0000
Subject: [PATCH 13/20] ci(release): 1.9.0-beta.5 [skip ci]

## [1.9.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.4...v1.9.0-beta.5) (2024-07-15)

### Bug Fixes

* search link node ([cf3ab55](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf3ab5564ae5c415c63d1771b32ea68f5169ca82))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e50e973..cf1a1e7a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.9.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.4...v1.9.0-beta.5) (2024-07-15)
+
+
+### Bug Fixes
+
+* search link node ([cf3ab55](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cf3ab5564ae5c415c63d1771b32ea68f5169ca82))
+
 ## [1.9.0-beta.4](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.3...v1.9.0-beta.4) (2024-07-14)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index a841a4a0..dc011516 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.9.0b4"
+version = "1.9.0b5"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From 602dd00209ee1d72a1223fc4793759450921fcf9 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Tue, 16 Jul 2024 12:39:48 +0200
Subject: [PATCH 14/20] feat: refactoring_to_md function

---
 pyproject.toml                       |  1 -
 requirements-dev.lock                | 31 ---------------------------
 requirements.lock                    | 32 ----------------------------
 scrapegraphai/utils/convert_to_md.py |  8 +++----
 tests/utils/convert_to_md_test.py    |  6 +++---
 5 files changed, 6 insertions(+), 72 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index dc011516..59e9ef1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,6 @@ dependencies = [
     "undetected-playwright==0.3.0",
     "semchunk==1.0.1",
     "html2text==2024.2.26",
-    "trafilatura==1.10.0",
     "langchain-fireworks==0.1.3"
 ]
 
diff --git a/requirements-dev.lock b/requirements-dev.lock
index f3d4786c..dcc4744e 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -41,7 +41,6 @@ attrs==23.2.0
     # via jsonschema
     # via referencing
 babel==2.15.0
-    # via courlan
     # via sphinx
 beautifulsoup4==4.12.3
     # via furo
@@ -63,11 +62,8 @@ certifi==2024.2.2
     # via httpcore
     # via httpx
     # via requests
-    # via trafilatura
 charset-normalizer==3.3.2
-    # via htmldate
     # via requests
-    # via trafilatura
 click==8.1.7
     # via burr
     # via streamlit
@@ -75,15 +71,11 @@ click==8.1.7
     # via uvicorn
 contourpy==1.2.1
     # via matplotlib
-courlan==1.2.0
-    # via trafilatura
 cycler==0.12.1
     # via matplotlib
 dataclasses-json==0.6.6
     # via langchain
     # via langchain-community
-dateparser==1.2.0
-    # via htmldate
 defusedxml==0.7.1
     # via langchain-anthropic
 dill==0.3.8
@@ -204,8 +196,6 @@ h11==0.14.0
     # via uvicorn
 html2text==2024.2.26
     # via scrapegraphai
-htmldate==1.8.1
-    # via trafilatura
 httpcore==1.0.5
     # via httpx
 httplib2==0.22.0
@@ -259,8 +249,6 @@ jsonschema==4.22.0
     # via altair
 jsonschema-specifications==2023.12.1
     # via jsonschema
-justext==3.0.1
-    # via trafilatura
 kiwisolver==1.4.5
     # via matplotlib
 langchain==0.1.15
@@ -302,12 +290,6 @@ loguru==0.7.2
     # via burr
 lxml==5.2.2
     # via free-proxy
-    # via htmldate
-    # via justext
-    # via lxml-html-clean
-    # via trafilatura
-lxml-html-clean==0.1.1
-    # via lxml
 markdown-it-py==3.0.0
     # via rich
 markupsafe==2.1.5
@@ -430,9 +412,7 @@ pytest==8.0.0
 pytest-mock==3.14.0
 python-dateutil==2.9.0.post0
     # via botocore
-    # via dateparser
     # via google-cloud-bigquery
-    # via htmldate
     # via matplotlib
     # via pandas
 python-dotenv==1.0.1
@@ -441,7 +421,6 @@ python-dotenv==1.0.1
 python-multipart==0.0.9
     # via fastapi
 pytz==2024.1
-    # via dateparser
     # via pandas
 pyyaml==6.0.1
     # via huggingface-hub
@@ -453,7 +432,6 @@ referencing==0.35.1
     # via jsonschema
     # via jsonschema-specifications
 regex==2024.5.15
-    # via dateparser
     # via tiktoken
 requests==2.32.2
     # via burr
@@ -534,8 +512,6 @@ tenacity==8.3.0
 tiktoken==0.6.0
     # via langchain-openai
     # via scrapegraphai
-tld==0.13
-    # via courlan
 tokenizers==0.19.1
     # via anthropic
 toml==0.10.2
@@ -555,8 +531,6 @@ tqdm==4.66.4
     # via openai
     # via scrapegraphai
     # via semchunk
-trafilatura==1.10.0
-    # via scrapegraphai
 typer==0.12.3
     # via fastapi-cli
 typing-extensions==4.12.0
@@ -586,8 +560,6 @@ typing-inspect==0.9.0
     # via sf-hamilton
 tzdata==2024.1
     # via pandas
-tzlocal==5.2
-    # via dateparser
 ujson==5.10.0
     # via fastapi
 undetected-playwright==0.3.0
@@ -596,10 +568,7 @@ uritemplate==4.1.1
     # via google-api-python-client
 urllib3==1.26.18
     # via botocore
-    # via courlan
-    # via htmldate
     # via requests
-    # via trafilatura
 uvicorn==0.29.0
     # via burr
     # via fastapi
diff --git a/requirements.lock b/requirements.lock
index 21b276eb..ad1c7ed7 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -28,8 +28,6 @@ async-timeout==4.0.3
     # via langchain
 attrs==23.2.0
     # via aiohttp
-babel==2.15.0
-    # via courlan
 beautifulsoup4==4.12.3
     # via google
     # via scrapegraphai
@@ -44,18 +42,11 @@ certifi==2024.2.2
     # via httpcore
     # via httpx
     # via requests
-    # via trafilatura
 charset-normalizer==3.3.2
-    # via htmldate
     # via requests
-    # via trafilatura
-courlan==1.2.0
-    # via trafilatura
 dataclasses-json==0.6.6
     # via langchain
     # via langchain-community
-dateparser==1.2.0
-    # via htmldate
 defusedxml==0.7.1
     # via langchain-anthropic
 distro==1.9.0
@@ -150,8 +141,6 @@ h11==0.14.0
     # via httpcore
 html2text==2024.2.26
     # via scrapegraphai
-htmldate==1.8.1
-    # via trafilatura
 httpcore==1.0.5
     # via httpx
 httplib2==0.22.0
@@ -181,8 +170,6 @@ jsonpatch==1.33
     # via langchain-core
 jsonpointer==2.4
     # via jsonpatch
-justext==3.0.1
-    # via trafilatura
 langchain==0.1.15
     # via scrapegraphai
 langchain-anthropic==0.1.11
@@ -220,12 +207,6 @@ langsmith==0.1.63
     # via langchain-core
 lxml==5.2.2
     # via free-proxy
-    # via htmldate
-    # via justext
-    # via lxml-html-clean
-    # via trafilatura
-lxml-html-clean==0.1.1
-    # via lxml
 marshmallow==3.21.2
     # via dataclasses-json
 minify-html==0.15.0
@@ -298,14 +279,11 @@ pyparsing==3.1.2
     # via httplib2
 python-dateutil==2.9.0.post0
     # via botocore
-    # via dateparser
     # via google-cloud-bigquery
-    # via htmldate
     # via pandas
 python-dotenv==1.0.1
     # via scrapegraphai
 pytz==2024.1
-    # via dateparser
     # via pandas
 pyyaml==6.0.1
     # via huggingface-hub
@@ -313,7 +291,6 @@ pyyaml==6.0.1
     # via langchain-community
     # via langchain-core
 regex==2024.5.15
-    # via dateparser
     # via tiktoken
 requests==2.32.2
     # via free-proxy
@@ -354,8 +331,6 @@ tenacity==8.3.0
 tiktoken==0.6.0
     # via langchain-openai
     # via scrapegraphai
-tld==0.13
-    # via courlan
 tokenizers==0.19.1
     # via anthropic
 tqdm==4.66.4
@@ -364,8 +339,6 @@ tqdm==4.66.4
     # via openai
     # via scrapegraphai
     # via semchunk
-trafilatura==1.10.0
-    # via scrapegraphai
 typing-extensions==4.12.0
     # via anthropic
     # via anyio
@@ -382,17 +355,12 @@ typing-inspect==0.9.0
     # via dataclasses-json
 tzdata==2024.1
     # via pandas
-tzlocal==5.2
-    # via dateparser
 undetected-playwright==0.3.0
     # via scrapegraphai
 uritemplate==4.1.1
     # via google-api-python-client
 urllib3==1.26.18
     # via botocore
-    # via courlan
-    # via htmldate
     # via requests
-    # via trafilatura
 yarl==1.9.4
     # via aiohttp
diff --git a/scrapegraphai/utils/convert_to_md.py b/scrapegraphai/utils/convert_to_md.py
index a2ec04db..35123042 100644
--- a/scrapegraphai/utils/convert_to_md.py
+++ b/scrapegraphai/utils/convert_to_md.py
@@ -2,8 +2,6 @@
 convert_to_md modul
 """
 import html2text
-from trafilatura import extract
-
 
 def convert_to_md(html):
     """ Convert HTML to Markdown.
@@ -20,6 +18,6 @@ def convert_to_md(html):
     'This is a paragraph.\n\n# This is a heading.'
 
     Note: All the styles and links are ignored during the conversion. """
-
-    return extract(filecontent=html,include_images=True,
-                       include_links=True, include_tables=True, output_format="markdown")
+    h = html2text.HTML2Text()
+    h.ignore_links = False
+    return h.handle(html)
diff --git a/tests/utils/convert_to_md_test.py b/tests/utils/convert_to_md_test.py
index 0b6d552e..72270913 100644
--- a/tests/utils/convert_to_md_test.py
+++ b/tests/utils/convert_to_md_test.py
@@ -7,7 +7,7 @@ def test_basic_html_to_md():
 
 def test_html_with_links_and_images():
     html = '<p>This is a <a href="https://example.com">link</a> and this is an <img src="https://example.com/image.jpg" alt="image"></p>'
-    assert convert_to_md(html) is  None
+    assert convert_to_md(html) is not None
 
 def test_html_with_tables():
     html = '''
@@ -17,11 +17,11 @@ def test_html_with_tables():
         <tr><td>Row 2, Cell 1</td><td>Row 2, Cell 2</td></tr>
     </table>
     '''
-    assert convert_to_md(html) is  None
+    assert convert_to_md(html) is not None
 
 def test_empty_html():
     html = ""
-    assert convert_to_md(html) is None
+    assert convert_to_md(html) is not None
 
 def test_complex_html_structure():
     html = '''

From 54a69de69e8077e02fd5584783ca62cc2e0ec5bb Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 17 Jul 2024 08:06:07 +0000
Subject: [PATCH 15/20] ci(release): 1.9.0-beta.6 [skip ci]

## [1.9.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.5...v1.9.0-beta.6) (2024-07-17)

### Features

* refactoring_to_md function ([602dd00](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/602dd00209ee1d72a1223fc4793759450921fcf9))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf1a1e7a..cbd47fe8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.9.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.5...v1.9.0-beta.6) (2024-07-17)
+
+
+### Features
+
+* refactoring_to_md function ([602dd00](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/602dd00209ee1d72a1223fc4793759450921fcf9))
+
 ## [1.9.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.9.0-beta.4...v1.9.0-beta.5) (2024-07-15)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 59e9ef1a..369113c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.9.0b5"
+version = "1.9.0b6"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."

From e2ca41e928b9b8868fadc2bfd5bb02a9ff71f709 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 18 Jul 2024 10:24:24 +0200
Subject: [PATCH 16/20] add example

---
 examples/openai/search_link_graph_openai.py | 36 +++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 examples/openai/search_link_graph_openai.py

diff --git a/examples/openai/search_link_graph_openai.py b/examples/openai/search_link_graph_openai.py
new file mode 100644
index 00000000..10d10d4c
--- /dev/null
+++ b/examples/openai/search_link_graph_openai.py
@@ -0,0 +1,36 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+from scrapegraphai.graphs import SearchLinkGraph
+from scrapegraphai.utils import prettify_exec_info
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+graph_config = {
+    "llm": {
+        "api_key": "s",
+        "model": "gpt-3.5-turbo",
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the SearchLinkGraph instance and run it
+# ************************************************
+
+smart_scraper_graph = SearchLinkGraph(
+    source="https://sport.sky.it/nba?gr=www",
+    config=graph_config
+)
+
+result = smart_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = smart_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))

From 7314bc383068db590662bf7e512f799529308991 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Thu, 18 Jul 2024 10:56:25 +0200
Subject: [PATCH 17/20] chore: upgrade tiktoken

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 369113c5..f77bb185 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
     "beautifulsoup4==4.12.3",
     "pandas==2.2.2",
     "python-dotenv==1.0.1",
-    "tiktoken==0.6.0",
+    "tiktoken==0.7",
     "tqdm==4.66.4",
     "graphviz==0.20.3",
     "minify-html==0.15.0",

From dfa759e8340828cbed1f899b325fab507fe89a78 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Thu, 18 Jul 2024 11:02:51 +0200
Subject: [PATCH 18/20] style(models): fix module docstrings

---
 scrapegraphai/models/bedrock.py | 2 +-
 scrapegraphai/models/ernie.py   | 2 +-
 scrapegraphai/models/oneapi.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/scrapegraphai/models/bedrock.py b/scrapegraphai/models/bedrock.py
index b7cbe288..06299075 100644
--- a/scrapegraphai/models/bedrock.py
+++ b/scrapegraphai/models/bedrock.py
@@ -1,5 +1,5 @@
 """ 
-bedrock configuration wrapper
+Bedrock Module
 """
 from langchain_aws import ChatBedrock
 
diff --git a/scrapegraphai/models/ernie.py b/scrapegraphai/models/ernie.py
index 0b4701e1..75e2a261 100644
--- a/scrapegraphai/models/ernie.py
+++ b/scrapegraphai/models/ernie.py
@@ -1,5 +1,5 @@
 """ 
-Ollama Module
+Ernie Module
 """
 from langchain_community.chat_models import ErnieBotChat
 
diff --git a/scrapegraphai/models/oneapi.py b/scrapegraphai/models/oneapi.py
index 00dddbf9..54e846d9 100644
--- a/scrapegraphai/models/oneapi.py
+++ b/scrapegraphai/models/oneapi.py
@@ -1,5 +1,5 @@
 """ 
-OpenAI Module
+OneAPI Module
 """
 from langchain_openai import ChatOpenAI
 

From c7b05a4993df14d6ed4848121a3cd209571232f7 Mon Sep 17 00:00:00 2001
From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com>
Date: Thu, 18 Jul 2024 11:23:10 +0200
Subject: [PATCH 19/20] chore(ci): upgrade lockfiles

---
 requirements-dev.lock | 3 ++-
 requirements.lock     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index dcc4744e..300d05f5 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -180,6 +180,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.8.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -509,7 +510,7 @@ tenacity==8.3.0
     # via langchain-community
     # via langchain-core
     # via streamlit
-tiktoken==0.6.0
+tiktoken==0.7.0
     # via langchain-openai
     # via scrapegraphai
 tokenizers==0.19.1
diff --git a/requirements.lock b/requirements.lock
index ad1c7ed7..2e9cf828 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -126,6 +126,7 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
+    # via sqlalchemy
 groq==0.8.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
@@ -328,7 +329,7 @@ tenacity==8.3.0
     # via langchain
     # via langchain-community
     # via langchain-core
-tiktoken==0.6.0
+tiktoken==0.7.0
     # via langchain-openai
     # via scrapegraphai
 tokenizers==0.19.1

From a3d0aacff5c55c83dbc84723526856a7f38fbb42 Mon Sep 17 00:00:00 2001
From: Marco Vinciguerra <mvincig11@gmail.com>
Date: Thu, 18 Jul 2024 11:30:25 +0200
Subject: [PATCH 20/20] update env

---
 requirements-dev.lock | 1 -
 requirements.lock     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/requirements-dev.lock b/requirements-dev.lock
index 300d05f5..b0bcaaa0 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -180,7 +180,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.8.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1
diff --git a/requirements.lock b/requirements.lock
index 2e9cf828..7a8bb455 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -126,7 +126,6 @@ graphviz==0.20.3
     # via scrapegraphai
 greenlet==3.0.3
     # via playwright
-    # via sqlalchemy
 groq==0.8.0
     # via langchain-groq
 grpc-google-iam-v1==0.13.1