From 898e5a7af504fbf4c1cabb14103e66184037de49 Mon Sep 17 00:00:00 2001 From: Matteo Vedovati Date: Wed, 7 Aug 2024 21:42:54 +0200 Subject: [PATCH] fix: refactoring of merge_answer_node --- scrapegraphai/helpers/__init__.py | 1 + scrapegraphai/helpers/merge_answer_node_prompts.py | 13 +++++++++++++ scrapegraphai/nodes/merge_answers_node.py | 13 ++----------- 3 files changed, 16 insertions(+), 11 deletions(-) create mode 100644 scrapegraphai/helpers/merge_answer_node_prompts.py diff --git a/scrapegraphai/helpers/__init__.py b/scrapegraphai/helpers/__init__.py index d238f76e..4174424a 100644 --- a/scrapegraphai/helpers/__init__.py +++ b/scrapegraphai/helpers/__init__.py @@ -10,3 +10,4 @@ from .generate_answer_node_prompts import template_chunks, template_no_chunks, t from .generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv from .generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf from .generate_answer_node_omni_prompts import template_chunks_omni, template_no_chunk_omni, template_merge_omni +from .merge_answer_node_prompts import template_combined diff --git a/scrapegraphai/helpers/merge_answer_node_prompts.py b/scrapegraphai/helpers/merge_answer_node_prompts.py new file mode 100644 index 00000000..b6dad71b --- /dev/null +++ b/scrapegraphai/helpers/merge_answer_node_prompts.py @@ -0,0 +1,13 @@ +""" +Merge answer node prompts +""" + +template_combined = """ + You are a website scraper and you have just scraped some content from multiple websites.\n + You are now asked to provide an answer to a USER PROMPT based on the content you have scraped.\n + You need to merge the content from the different websites into a single answer without repetitions (if there are any). \n + The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n + OUTPUT INSTRUCTIONS: {format_instructions}\n + USER PROMPT: {user_prompt}\n + WEBSITE CONTENT: {website_content} + """ \ No newline at end of file diff --git a/scrapegraphai/nodes/merge_answers_node.py b/scrapegraphai/nodes/merge_answers_node.py index 548b7c04..eaea0184 100644 --- a/scrapegraphai/nodes/merge_answers_node.py +++ b/scrapegraphai/nodes/merge_answers_node.py @@ -7,6 +7,7 @@ from langchain.prompts import PromptTemplate from langchain_core.output_parsers import JsonOutputParser from ..utils.logging import get_logger from .base_node import BaseNode +from ..helpers import template_combined class MergeAnswersNode(BaseNode): @@ -79,18 +80,8 @@ class MergeAnswersNode(BaseNode): format_instructions = output_parser.get_format_instructions() - template_merge = """ - You are a website scraper and you have just scraped some content from multiple websites.\n - You are now asked to provide an answer to a USER PROMPT based on the content you have scraped.\n - You need to merge the content from the different websites into a single answer without repetitions (if there are any). \n - The scraped contents are in a JSON format and you need to merge them based on the context and providing a correct JSON structure.\n - OUTPUT INSTRUCTIONS: {format_instructions}\n - USER PROMPT: {user_prompt}\n - WEBSITE CONTENT: {website_content} - """ - prompt_template = PromptTemplate( - template=template_merge, + template=template_combined, input_variables=["user_prompt"], partial_variables={ "format_instructions": format_instructions,