From cec4c43c64f582c18d8d7e6038a511884780d68d Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Tue, 9 Apr 2024 10:23:38 +0200
Subject: [PATCH 1/3] add new local models context window

---
 scrapegraphai/graphs/abstract_graph.py      | 12 +++++++-----
 scrapegraphai/graphs/smart_scraper_graph.py |  1 -
 scrapegraphai/graphs/speech_graph.py        |  4 ++--
 scrapegraphai/helpers/models_tokens.py      |  6 ++++--
 scrapegraphai/nodes/fetch_node.py           |  4 ++--
 scrapegraphai/nodes/rag_node.py             | 12 +++++++-----
 scrapegraphai/nodes/search_internet_node.py |  2 +-
 7 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 79eea199..4014aae4 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -6,6 +6,7 @@ from typing import Optional
 from ..models import OpenAI, Gemini, Ollama, AzureOpenAI
 from ..helpers import models_tokens
 
+
 class AbstractGraph(ABC):
     """
     Abstract class representing a generic graph-based tool.
@@ -19,7 +20,8 @@ class AbstractGraph(ABC):
         self.source = source
         self.config = config
         self.llm_model = self._create_llm(config["llm"])
-        self.embedder_model = None if "embeddings" not in config else self._create_llm(config["embeddings"])
+        self.embedder_model = None if "embeddings" not in config else self._create_llm(
+            config["embeddings"])
         self.graph = self._create_graph()
 
     def _create_llm(self, llm_config: dict):
@@ -39,7 +41,7 @@ class AbstractGraph(ABC):
             except KeyError:
                 raise ValueError("Model not supported")
             return OpenAI(llm_params)
-        
+
         elif "azure" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
@@ -48,14 +50,14 @@ class AbstractGraph(ABC):
             except KeyError:
                 raise ValueError("Model not supported")
             return AzureOpenAI(llm_params)
-        
+
         elif "gemini" in llm_params["model"]:
             try:
                 self.model_token = models_tokens["gemini"][llm_params["model"]]
             except KeyError:
                 raise ValueError("Model not supported")
             return Gemini(llm_params)
-        
+
         elif "ollama" in llm_params["model"]:
             # take the model after the last dash
             llm_params["model"] = llm_params["model"].split("/")[-1]
@@ -64,7 +66,7 @@ class AbstractGraph(ABC):
             except KeyError:
                 raise ValueError("Model not supported")
             return Ollama(llm_params)
-        
+
         else:
             raise ValueError("Model not supported")
 
diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
index 06f7d010..e4c32100 100644
--- a/scrapegraphai/graphs/smart_scraper_graph.py
+++ b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -1,7 +1,6 @@
 """ 
 Module for creating the smart scraper
 """
-from ..models import OpenAI, Gemini
 from .base_graph import BaseGraph
 from ..nodes import (
     FetchNode,
diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py
index 28d0cdfa..a34fa2f1 100644
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@@ -2,7 +2,7 @@
 Module for converting text to speach
 """
 from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
-from ..models import OpenAI, Gemini, OpenAITextToSpeech
+from ..models import OpenAITextToSpeech
 from .base_graph import BaseGraph
 from ..nodes import (
     FetchNode,
@@ -27,7 +27,7 @@ class SpeechGraph(AbstractGraph):
         super().__init__(prompt, config, source)
 
         self.input_key = "url" if source.startswith("http") else "local_dir"
-        
+
     def _create_graph(self):
         """
         Creates the graph of nodes representing the workflow for web scraping and summarization.
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
index dc286526..5f9aa743 100644
--- a/scrapegraphai/helpers/models_tokens.py
+++ b/scrapegraphai/helpers/models_tokens.py
@@ -21,9 +21,11 @@ models_tokens = {
         "gemini-pro": 128000,
     },
 
-    "ollama":{
+    "ollama": {
         "llama2": 4096,
         "mistral": 8192,
+        "codellama": 16000,
+        "dolphin-mixtral": 32000,
+        "mistral-openorca": 32000,
     }
-    
 }
diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
index 75f53655..12f69240 100644
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@@ -67,8 +67,8 @@ class FetchNode(BaseNode):
         # Fetching data from the state based on the input keys
         input_data = [state[key] for key in input_keys]
 
-        source = input_data[0]        
-        
+        source = input_data[0]
+
         # if it is a local directory
         if not source.startswith("http"):
             document = [Document(page_content=source, metadata={
diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py
index 4ddabe66..adda5c33 100644
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@@ -8,8 +8,8 @@ from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
 from langchain_community.document_transformers import EmbeddingsRedundantFilter
 from langchain_community.vectorstores import FAISS
-from ..models import OpenAI, Gemini, Ollama, AzureOpenAI
 from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
+from ..models import OpenAI, Ollama, AzureOpenAI
 from langchain_community.embeddings import OllamaEmbeddings
 from .base_node import BaseNode
 
@@ -86,16 +86,18 @@ class RAGNode(BaseNode):
         embedding_model = self.embedder_model if self.embedder_model else self.llm_model
 
         if isinstance(embedding_model, OpenAI):
-            embeddings = OpenAIEmbeddings(api_key=embedding_model.openai_api_key)
+            embeddings = OpenAIEmbeddings(
+                api_key=embedding_model.openai_api_key)
         elif isinstance(embedding_model, AzureOpenAI):
             embeddings = AzureOpenAIEmbeddings()
         elif isinstance(embedding_model, Ollama):
             embeddings = OllamaEmbeddings()
         else:
             raise ValueError("Embedding Model missing or not supported")
-        
-        retriever = FAISS.from_documents(chunked_docs, embeddings).as_retriever()
-    
+
+        retriever = FAISS.from_documents(
+            chunked_docs, embeddings).as_retriever()
+
         redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
         # similarity_threshold could be set, now k=20
         relevant_filter = EmbeddingsFilter(embeddings=embeddings)
diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py
index 6220441d..8d5813b2 100644
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@@ -94,7 +94,7 @@ class SearchInternetNode(BaseNode):
         # Execute the chain to get the search query
         search_answer = search_prompt | self.llm_model | output_parser
         search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
-    
+
         print(f"Search Query: {search_query}")
         # TODO: handle multiple URLs
         answer = search_on_web(query=search_query, max_results=1)[0]

From 992e7f862fee04271b0ab5db6a9198f28d76a4fd Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Tue, 9 Apr 2024 10:33:45 +0200
Subject: [PATCH 2/3] add models avaiables

---
 scrapegraphai/graphs/abstract_graph.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 4014aae4..f08b0f33 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -59,7 +59,14 @@ class AbstractGraph(ABC):
             return Gemini(llm_params)
 
         elif "ollama" in llm_params["model"]:
-            # take the model after the last dash
+            """ 
+            Avaiable models:
+            - llama2
+            - mistral
+            - codellama
+            - dolphin-mixtral
+            - mistral-openorca
+            """
             llm_params["model"] = llm_params["model"].split("/")[-1]
             try:
                 self.model_token = models_tokens["ollama"][llm_params["model"]]

From a25e7ea32bd5daec15d29956a000b7faac1c042a Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Tue, 9 Apr 2024 11:45:20 +0200
Subject: [PATCH 3/3] add new version

---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index f8db368b..e47aec4d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -1117,13 +1117,13 @@ extended-testing = ["lxml (>=5.1.0,<6.0.0)"]
 
 [[package]]
 name = "langsmith"
-version = "0.1.40"
+version = "0.1.41"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langsmith-0.1.40-py3-none-any.whl", hash = "sha256:aa47d0f5a1eabd5c05ac6ce2cd3e28ccfc554d366e856a27b7c3c17c443881cb"},
-    {file = "langsmith-0.1.40.tar.gz", hash = "sha256:50fdf313741cf94e978de06025fd180b56acf1d1a4549b0fd5453ef23d5461ef"},
+    {file = "langsmith-0.1.41-py3-none-any.whl", hash = "sha256:11de22b6990502c630fdfdf6906681e664c6659d2118bcd2b79d08016e770831"},
+    {file = "langsmith-0.1.41.tar.gz", hash = "sha256:1250cd6c9074ca10d40002b23d79b3017329b139fbe954248fdd7a79544e90d0"},
 ]
 
 [package.dependencies]
diff --git a/pyproject.toml b/pyproject.toml
index 22ffb592..1d7dfe1e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scrapegraphai"
-version = "0.1.0"
+version = "0.1.1"
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
     "Marco Vinciguerra <mvincig11@gmail.com>",