Merge branch 'main' into refactoring_examples_folder

2026-06-25 21:11:11 +08:00 · 2024-04-09 11:46:34 +02:00 · 2024-04-09 11:46:34 +02:00 · b4e640879d
commit b4e640879d
parent ee533b8d74 a25e7ea32b
7 changed files with 20 additions and 11 deletions
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@ -1117,13 +1117,13 @@ extended-testing = ["lxml (>=5.1.0,<6.0.0)"]

 [[package]]
 name = "langsmith"
-version = "0.1.40"
+version = "0.1.41"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langsmith-0.1.40-py3-none-any.whl", hash = "sha256:aa47d0f5a1eabd5c05ac6ce2cd3e28ccfc554d366e856a27b7c3c17c443881cb"},
-    {file = "langsmith-0.1.40.tar.gz", hash = "sha256:50fdf313741cf94e978de06025fd180b56acf1d1a4549b0fd5453ef23d5461ef"},
+    {file = "langsmith-0.1.41-py3-none-any.whl", hash = "sha256:11de22b6990502c630fdfdf6906681e664c6659d2118bcd2b79d08016e770831"},
+    {file = "langsmith-0.1.41.tar.gz", hash = "sha256:1250cd6c9074ca10d40002b23d79b3017329b139fbe954248fdd7a79544e90d0"},
 ]

 [package.dependencies]
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scrapegraphai"
-version = "0.1.0"
+version = "0.1.1"
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
    "Marco Vinciguerra <mvincig11@gmail.com>",
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@ -59,7 +59,14 @@ class AbstractGraph(ABC):
            return Gemini(llm_params)

        elif "ollama" in llm_params["model"]:
-            # take the model after the last dash
+            """ 
+            Avaiable models:
+            - llama2
+            - mistral
+            - codellama
+            - dolphin-mixtral
+            - mistral-openorca
+            """
            llm_params["model"] = llm_params["model"].split("/")[-1]
            try:
                self.model_token = models_tokens["ollama"][llm_params["model"]]
--- a/scrapegraphai/graphs/speech_graph.py
+++ b/scrapegraphai/graphs/speech_graph.py
@ -2,7 +2,7 @@
 Module for converting text to speach
 """
 from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
-from ..models import OpenAI, Gemini, OpenAITextToSpeech
+from ..models import OpenAITextToSpeech
 from .base_graph import BaseGraph
 from ..nodes import (
    FetchNode,
--- a/scrapegraphai/helpers/models_tokens.py
+++ b/scrapegraphai/helpers/models_tokens.py
@ -24,6 +24,8 @@ models_tokens = {
    "ollama": {
        "llama2": 4096,
        "mistral": 8192,
+        "codellama": 16000,
+        "dolphin-mixtral": 32000,
+        "mistral-openorca": 32000,
    }
-
 }
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@ -67,8 +67,8 @@ class FetchNode(BaseNode):
        # Fetching data from the state based on the input keys
        input_data = [state[key] for key in input_keys]

-        source = input_data[0]        
-        
+        source = input_data[0]
+
        # if it is a local directory
        if not source.startswith("http"):
            document = [Document(page_content=source, metadata={
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@ -8,8 +8,8 @@ from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
 from langchain_community.document_transformers import EmbeddingsRedundantFilter
 from langchain_community.vectorstores import FAISS
-from ..models import OpenAI, Gemini, Ollama, AzureOpenAI
 from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
+from ..models import OpenAI, Ollama, AzureOpenAI
 from langchain_community.embeddings import OllamaEmbeddings
 from .base_node import BaseNode