diff --git a/poetry.lock b/poetry.lock index f8db368b..e47aec4d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -1117,13 +1117,13 @@ extended-testing = ["lxml (>=5.1.0,<6.0.0)"] [[package]] name = "langsmith" -version = "0.1.40" +version = "0.1.41" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.40-py3-none-any.whl", hash = "sha256:aa47d0f5a1eabd5c05ac6ce2cd3e28ccfc554d366e856a27b7c3c17c443881cb"}, - {file = "langsmith-0.1.40.tar.gz", hash = "sha256:50fdf313741cf94e978de06025fd180b56acf1d1a4549b0fd5453ef23d5461ef"}, + {file = "langsmith-0.1.41-py3-none-any.whl", hash = "sha256:11de22b6990502c630fdfdf6906681e664c6659d2118bcd2b79d08016e770831"}, + {file = "langsmith-0.1.41.tar.gz", hash = "sha256:1250cd6c9074ca10d40002b23d79b3017329b139fbe954248fdd7a79544e90d0"}, ] [package.dependencies] diff --git a/pyproject.toml b/pyproject.toml index 22ffb592..1d7dfe1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapegraphai" -version = "0.1.0" +version = "0.1.1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ "Marco Vinciguerra ", diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 4014aae4..f08b0f33 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -59,7 +59,14 @@ class AbstractGraph(ABC): return Gemini(llm_params) elif "ollama" in llm_params["model"]: - # take the model after the last dash + """ + Avaiable models: + - llama2 + - mistral + - codellama + - dolphin-mixtral + - mistral-openorca + """ llm_params["model"] = llm_params["model"].split("/")[-1] try: self.model_token = models_tokens["ollama"][llm_params["model"]] diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index 131ee2c1..a34fa2f1 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -2,7 +2,7 @@ Module for converting text to speach """ from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes -from ..models import OpenAI, Gemini, OpenAITextToSpeech +from ..models import OpenAITextToSpeech from .base_graph import BaseGraph from ..nodes import ( FetchNode, diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 63723ba3..5f9aa743 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -24,6 +24,8 @@ models_tokens = { "ollama": { "llama2": 4096, "mistral": 8192, + "codellama": 16000, + "dolphin-mixtral": 32000, + "mistral-openorca": 32000, } - } diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 75f53655..12f69240 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -67,8 +67,8 @@ class FetchNode(BaseNode): # Fetching data from the state based on the input keys input_data = [state[key] for key in input_keys] - source = input_data[0] - + source = input_data[0] + # if it is a local directory if not source.startswith("http"): document = [Document(page_content=source, metadata={ diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index df7d3d28..adda5c33 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -8,8 +8,8 @@ from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain_community.vectorstores import FAISS -from ..models import OpenAI, Gemini, Ollama, AzureOpenAI from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings +from ..models import OpenAI, Ollama, AzureOpenAI from langchain_community.embeddings import OllamaEmbeddings from .base_node import BaseNode