diff --git a/examples/deepseek/.env.example b/examples/deepseek/.env.example index 12c1491c..37511138 100644 --- a/examples/deepseek/.env.example +++ b/examples/deepseek/.env.example @@ -1 +1 @@ -OPENAI_APIKEY="your openai api key" \ No newline at end of file +DEEPSEEK_APIKEY="your api key" \ No newline at end of file diff --git a/examples/deepseek/csv_scraper_deepseek.py b/examples/deepseek/csv_scraper_deepseek.py index b734b543..fd55469d 100644 --- a/examples/deepseek/csv_scraper_deepseek.py +++ b/examples/deepseek/csv_scraper_deepseek.py @@ -30,6 +30,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/csv_scraper_graph_multi_deepseek.py b/examples/deepseek/csv_scraper_graph_multi_deepseek.py index ea5e9154..d665bc31 100644 --- a/examples/deepseek/csv_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/csv_scraper_graph_multi_deepseek.py @@ -30,6 +30,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/custom_graph_deepseek.py b/examples/deepseek/custom_graph_deepseek.py index f73639b0..a265db95 100644 --- a/examples/deepseek/custom_graph_deepseek.py +++ b/examples/deepseek/custom_graph_deepseek.py @@ -20,6 +20,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py index dfe6f489..696a08d9 100644 --- a/examples/deepseek/json_scraper_deepseek.py +++ b/examples/deepseek/json_scraper_deepseek.py @@ -29,6 +29,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/json_scraper_multi_deepseek.py b/examples/deepseek/json_scraper_multi_deepseek.py index b957dde0..17660ddb 100644 --- a/examples/deepseek/json_scraper_multi_deepseek.py +++ b/examples/deepseek/json_scraper_multi_deepseek.py @@ -15,6 +15,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py index 3a0f8391..3bd100d5 100644 --- a/examples/deepseek/pdf_scraper_graph_deepseek.py +++ b/examples/deepseek/pdf_scraper_graph_deepseek.py @@ -20,6 +20,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py index 211e4635..c884b798 100644 --- a/examples/deepseek/pdf_scraper_multi_deepseek.py +++ b/examples/deepseek/pdf_scraper_multi_deepseek.py @@ -15,6 +15,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/scrape_plain_text_deepseek.py b/examples/deepseek/scrape_plain_text_deepseek.py index d7a070d7..7076dd39 100644 --- a/examples/deepseek/scrape_plain_text_deepseek.py +++ b/examples/deepseek/scrape_plain_text_deepseek.py @@ -31,6 +31,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/script_generator_deepseek.py b/examples/deepseek/script_generator_deepseek.py index fd5fd4dd..09db0876 100644 --- a/examples/deepseek/script_generator_deepseek.py +++ b/examples/deepseek/script_generator_deepseek.py @@ -20,6 +20,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "library": "beautifulsoup" } diff --git a/examples/deepseek/search_graph_deepseek.py b/examples/deepseek/search_graph_deepseek.py index 74944370..1ef42602 100644 --- a/examples/deepseek/search_graph_deepseek.py +++ b/examples/deepseek/search_graph_deepseek.py @@ -19,6 +19,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "max_results": 2, "verbose": True, diff --git a/examples/deepseek/smart_scraper_deepseek.py b/examples/deepseek/smart_scraper_deepseek.py index ed291b02..9fe00a2a 100644 --- a/examples/deepseek/smart_scraper_deepseek.py +++ b/examples/deepseek/smart_scraper_deepseek.py @@ -21,6 +21,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/smart_scraper_schema_deepseek.py b/examples/deepseek/smart_scraper_schema_deepseek.py index c83c6e9d..8d0cf376 100644 --- a/examples/deepseek/smart_scraper_schema_deepseek.py +++ b/examples/deepseek/smart_scraper_schema_deepseek.py @@ -41,6 +41,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/xml_scraper_deepseek.py b/examples/deepseek/xml_scraper_deepseek.py index ba401b91..3b2af61b 100644 --- a/examples/deepseek/xml_scraper_deepseek.py +++ b/examples/deepseek/xml_scraper_deepseek.py @@ -31,6 +31,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/deepseek/xml_scraper_graph_multi_deepseek.py b/examples/deepseek/xml_scraper_graph_multi_deepseek.py index 0f53a6b2..5d3c29d5 100644 --- a/examples/deepseek/xml_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/xml_scraper_graph_multi_deepseek.py @@ -30,6 +30,11 @@ graph_config = { "model": "deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/groq/csv_scraper_graph_multi_groq.py b/examples/groq/csv_scraper_graph_multi_groq.py index 475b8cac..87e3279c 100644 --- a/examples/groq/csv_scraper_graph_multi_groq.py +++ b/examples/groq/csv_scraper_graph_multi_groq.py @@ -30,6 +30,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "headless": False } diff --git a/examples/groq/csv_scraper_groq.py b/examples/groq/csv_scraper_groq.py index 805ce5fc..20839a75 100644 --- a/examples/groq/csv_scraper_groq.py +++ b/examples/groq/csv_scraper_groq.py @@ -31,6 +31,11 @@ graph_config = { "api_key": groq_key, "temperature": 0 }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily + }, } # ************************************************ # Create the CSVScraperGraph instance and run it diff --git a/examples/groq/custom_graph_groq.py b/examples/groq/custom_graph_groq.py index 7b35d7a7..d0384ffd 100644 --- a/examples/groq/custom_graph_groq.py +++ b/examples/groq/custom_graph_groq.py @@ -19,6 +19,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, "headless": False diff --git a/examples/groq/json_scraper_groq.py b/examples/groq/json_scraper_groq.py index a9099069..3faddae8 100644 --- a/examples/groq/json_scraper_groq.py +++ b/examples/groq/json_scraper_groq.py @@ -30,6 +30,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, "headless": False diff --git a/examples/groq/json_scraper_multi_groq.py b/examples/groq/json_scraper_multi_groq.py index df3b9276..13b49be6 100644 --- a/examples/groq/json_scraper_multi_groq.py +++ b/examples/groq/json_scraper_multi_groq.py @@ -15,6 +15,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "library": "beautifulsoup" } diff --git a/examples/groq/pdf_scraper_graph_groq.py b/examples/groq/pdf_scraper_graph_groq.py index 27f51e58..b04283b8 100644 --- a/examples/groq/pdf_scraper_graph_groq.py +++ b/examples/groq/pdf_scraper_graph_groq.py @@ -18,6 +18,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, } diff --git a/examples/groq/pdf_scraper_multi_groq.py b/examples/groq/pdf_scraper_multi_groq.py index c43a7087..f1afc058 100644 --- a/examples/groq/pdf_scraper_multi_groq.py +++ b/examples/groq/pdf_scraper_multi_groq.py @@ -14,6 +14,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "library": "beautifulsoup" } diff --git a/examples/groq/scrape_plain_text_groq.py b/examples/groq/scrape_plain_text_groq.py index 329df51f..73cda250 100644 --- a/examples/groq/scrape_plain_text_groq.py +++ b/examples/groq/scrape_plain_text_groq.py @@ -32,6 +32,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, "headless": False diff --git a/examples/groq/script_generator_groq.py b/examples/groq/script_generator_groq.py index 9e280e2b..a370eb3c 100644 --- a/examples/groq/script_generator_groq.py +++ b/examples/groq/script_generator_groq.py @@ -19,6 +19,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "library": "beautifulsoup" } diff --git a/examples/groq/search_graph_groq.py b/examples/groq/search_graph_groq.py index e3044c0e..e82ffb7c 100644 --- a/examples/groq/search_graph_groq.py +++ b/examples/groq/search_graph_groq.py @@ -21,6 +21,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "headless": False } diff --git a/examples/groq/smart_scraper_groq.py b/examples/groq/smart_scraper_groq.py index f32f3493..c1a5d319 100644 --- a/examples/groq/smart_scraper_groq.py +++ b/examples/groq/smart_scraper_groq.py @@ -22,10 +22,10 @@ graph_config = { "api_key": groq_key, "temperature": 0 }, - "embeddings": { + "embeddings": { "model": "ollama/nomic-embed-text", "temperature": 0, - "base_url": "http://localhost:11434", # set ollama URL arbitrarily + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "headless": False } diff --git a/examples/groq/smart_scraper_multi_groq.py b/examples/groq/smart_scraper_multi_groq.py index 6ead098c..18ba3992 100644 --- a/examples/groq/smart_scraper_multi_groq.py +++ b/examples/groq/smart_scraper_multi_groq.py @@ -19,6 +19,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, "headless": False diff --git a/examples/groq/smart_scraper_schema_groq.py b/examples/groq/smart_scraper_schema_groq.py index 3c23589a..2b80c658 100644 --- a/examples/groq/smart_scraper_schema_groq.py +++ b/examples/groq/smart_scraper_schema_groq.py @@ -41,6 +41,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "headless": False } diff --git a/examples/groq/xml_scraper_graph_multi_groq.py b/examples/groq/xml_scraper_graph_multi_groq.py index 62540671..7b102c0f 100644 --- a/examples/groq/xml_scraper_graph_multi_groq.py +++ b/examples/groq/xml_scraper_graph_multi_groq.py @@ -30,6 +30,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "headless": False } diff --git a/examples/groq/xml_scraper_groq.py b/examples/groq/xml_scraper_groq.py index 2172ea77..1c086175 100644 --- a/examples/groq/xml_scraper_groq.py +++ b/examples/groq/xml_scraper_groq.py @@ -30,6 +30,11 @@ graph_config = { "model": "groq/gemma-7b-it", "api_key": groq_key, "temperature": 0 + }, + "embeddings": { + "model": "ollama/nomic-embed-text", + "temperature": 0, + # "base_url": "http://localhost:11434", # set ollama URL arbitrarily }, "verbose": True, "headless": False diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 81ed0590..00efcdf8 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -69,8 +69,7 @@ class AbstractGraph(ABC): self.config = config self.schema = schema self.llm_model = self._create_llm(config["llm"], chat=True) - self.embedder_model = self._create_default_embedder(llm_config=config["llm"] - ) if "embeddings" not in config else self._create_embedder( + self.embedder_model = self._create_default_embedder(llm_config=config["llm"] ) if "embeddings" not in config else self._create_embedder( config["embeddings"]) self.verbose = False if config is None else config.get( "verbose", False) @@ -102,6 +101,7 @@ class AbstractGraph(ABC): "llm_model": self.llm_model, "embedder_model": self.embedder_model } + self.set_common_params(common_params, overwrite=False) # set burr config @@ -124,28 +124,7 @@ class AbstractGraph(ABC): for node in self.graph.nodes: node.update_config(params, overwrite) - - def _set_model_token(self, llm): - - if "Azure" in str(type(llm)): - try: - self.model_token = models_tokens["azure"][llm.model_name] - except KeyError: - raise KeyError("Model not supported") - - elif "HuggingFaceEndpoint" in str(type(llm)): - if "mistral" in llm.repo_id: - try: - self.model_token = models_tokens["mistral"][llm.repo_id] - except KeyError: - raise KeyError("Model not supported") - elif "Google" in str(type(llm)): - try: - if "gemini" in llm.model: - self.model_token = models_tokens["gemini"][llm.model] - except KeyError: - raise KeyError("Model not supported") - + def _create_llm(self, llm_config: dict, chat=False) -> object: """ Create a large language model instance based on the configuration provided. @@ -165,8 +144,6 @@ class AbstractGraph(ABC): # If model instance is passed directly instead of the model details if "model_instance" in llm_params: - if chat: - self._set_model_token(llm_params["model_instance"]) return llm_params["model_instance"] # Instantiate the language model based on the model name