From 69b8ed16b715c2866ba6cf26078a45b2d7e8c3f3 Mon Sep 17 00:00:00 2001 From: Nafay Rizwani <80161736+Nafay-0@users.noreply.github.com> Date: Tue, 13 Aug 2024 23:57:40 +0500 Subject: [PATCH 1/5] Fixed llm.rst for Azure OpenAI configuration --- docs/source/scrapers/llm.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/source/scrapers/llm.rst b/docs/source/scrapers/llm.rst index c22844d2..e76c56f1 100644 --- a/docs/source/scrapers/llm.rst +++ b/docs/source/scrapers/llm.rst @@ -132,10 +132,12 @@ We can also pass a model instance for the chat model and the embedding model. Fo azure_deployment="AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME", openai_api_version="AZURE_OPENAI_API_VERSION", ) - + # Supposing model_tokens are 100K + model_tokens_count = 100000 graph_config = { "llm": { - "model_instance": llm_model_instance + "model_instance": llm_model_instance, + "model_tokens": model_tokens_count, }, "embeddings": { "model_instance": embedder_model_instance @@ -191,4 +193,4 @@ We can also pass a model instance for the chat model and the embedding model. Fo "embeddings": { "model_instance": embedder_model_instance } - } \ No newline at end of file + } From 36d5b155acf9e8ed5ccaf7b11c9f2068e763b311 Mon Sep 17 00:00:00 2001 From: aziz-ullah-khan Date: Wed, 14 Aug 2024 17:39:53 +0500 Subject: [PATCH 2/5] fix azure-openai models_tokens issue --- scrapegraphai/helpers/models_tokens.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index e32838f1..e01f13de 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -19,7 +19,7 @@ models_tokens = { "gpt-4o-mini":128000, }, - "azure": { + "azure_openai": { "gpt-3.5-turbo-0125": 16385, "gpt-3.5": 4096, "gpt-3.5-turbo": 16385, From cb5ca90d73f46c91123ed1ad0171b77a575c9dae Mon Sep 17 00:00:00 2001 From: Andrew Masek Date: Mon, 19 Aug 2024 20:21:50 -0700 Subject: [PATCH 3/5] Fixed search prompt --- scrapegraphai/nodes/search_internet_node.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 61b11995..80c68139 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -82,6 +82,8 @@ class SearchInternetNode(BaseNode): For example, if the user prompt is "What is the capital of France?", you should return "capital of France". \n If you return something else, you will get a really bad grade. \n + What you return should be sufficient to get the answer from the internet. \n + I.E. don't just return a small part of the prompt. \n USER PROMPT: {user_prompt}""" search_prompt = PromptTemplate( From 31931aa300f068f3e018c521f8b8f15b5ebbdef4 Mon Sep 17 00:00:00 2001 From: Andrew Masek Date: Mon, 19 Aug 2024 20:39:49 -0700 Subject: [PATCH 4/5] Make language more general --- scrapegraphai/nodes/search_internet_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraphai/nodes/search_internet_node.py b/scrapegraphai/nodes/search_internet_node.py index 80c68139..87c8e9ab 100644 --- a/scrapegraphai/nodes/search_internet_node.py +++ b/scrapegraphai/nodes/search_internet_node.py @@ -83,7 +83,7 @@ class SearchInternetNode(BaseNode): you should return "capital of France". \n If you return something else, you will get a really bad grade. \n What you return should be sufficient to get the answer from the internet. \n - I.E. don't just return a small part of the prompt. \n + Don't just return a small part of the prompt, unless that is sufficient. \n USER PROMPT: {user_prompt}""" search_prompt = PromptTemplate( From 052f7d5e66436c97e17491c00b86c382642490b6 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 20 Aug 2024 09:53:35 +0200 Subject: [PATCH 5/5] fix(SearchNode): prompt Co-Authored-By: Andrew Masek <8219702+portoaj@users.noreply.github.com> --- scrapegraphai/prompts/search_internet_node_prompts.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapegraphai/prompts/search_internet_node_prompts.py b/scrapegraphai/prompts/search_internet_node_prompts.py index d786647b..f0508a53 100644 --- a/scrapegraphai/prompts/search_internet_node_prompts.py +++ b/scrapegraphai/prompts/search_internet_node_prompts.py @@ -11,4 +11,6 @@ You should return only the query string without any additional sentences. \n For example, if the user prompt is "What is the capital of France?", you should return "capital of France". \n If you return something else, you will get a really bad grade. \n +What you return should be sufficient to get the answer from the internet. \n +Don't just return a small part of the prompt, unless that is sufficient. \n USER PROMPT: {user_prompt}"""