From faef3186f795e950ade14bc8b6d8d1cea3afd327 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Fri, 16 Aug 2024 17:38:55 +0200 Subject: [PATCH] fix: model count --- examples/local_models/smart_scraper_ollama.py | 2 +- scrapegraphai/graphs/abstract_graph.py | 2 +- scrapegraphai/helpers/models_tokens.py | 1 + scrapegraphai/nodes/parse_node.py | 1 + 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/local_models/smart_scraper_ollama.py b/examples/local_models/smart_scraper_ollama.py index d5585ff7..3f6c0967 100644 --- a/examples/local_models/smart_scraper_ollama.py +++ b/examples/local_models/smart_scraper_ollama.py @@ -9,7 +9,7 @@ from scrapegraphai.utils import prettify_exec_info graph_config = { "llm": { - "model": "ollama/llama3.1", + "model": "ollama/mistral", "temperature": 0, "format": "json", # Ollama needs the format to be specified explicitly # "base_url": "http://localhost:11434", # set ollama URL arbitrarily diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 459d38fd..9cb39a0f 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -149,7 +149,7 @@ class AbstractGraph(ABC): known_models = ["openai", "azure_openai", "google_genai", "ollama", "oneapi", "nvidia", "groq", "google_vertexai", "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", "fireworks"] - if llm_params["model"] not in known_models: + if llm_params["model"].split("/")[0] not in known_models: raise ValueError(f"Model '{llm_params['model']}' is not supported") try: diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 7e91c8ea..791bcf72 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -62,6 +62,7 @@ models_tokens = { "scrapegraph": 8192, "llava": 4096, "mixtral:8x22b-instruct": 65536, + "mistral":8192, "mistral-openorca": 32000, "nomic-embed-text": 8192, "nous-hermes2:34b": 4096, diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index 1a5c1119..db7f8518 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -80,6 +80,7 @@ class ParseNode(BaseNode): docs_transformed = docs_transformed[0] if isinstance(docs_transformed, Document): + chunks = chunk(text=docs_transformed.page_content, chunk_size=self.node_config.get("chunk_size", 4096)-250, token_counter=lambda text: len(text.split()),