From a7249685cb2b133beeea439d1337cb1adeb64acd Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Sun, 14 Jul 2024 10:24:38 +0200 Subject: [PATCH] removed rag node --- scrapegraphai/graphs/csv_scraper_graph.py | 14 ++------------ scrapegraphai/graphs/json_scraper_graph.py | 14 ++------------ scrapegraphai/graphs/markdown_scraper_graph.py | 14 ++------------ scrapegraphai/graphs/omni_scraper_graph.py | 16 +++------------- scrapegraphai/graphs/pdf_scraper_graph.py | 13 +------------ scrapegraphai/graphs/smart_scraper_graph.py | 13 ++----------- scrapegraphai/graphs/xml_scraper_graph.py | 14 ++------------ scrapegraphai/nodes/generate_answer_csv_node.py | 4 ++-- scrapegraphai/nodes/generate_answer_node.py | 4 ++-- scrapegraphai/nodes/generate_answer_omni_node.py | 4 ++-- scrapegraphai/nodes/generate_answer_pdf_node.py | 2 +- 11 files changed, 21 insertions(+), 91 deletions(-) diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py index ea205bb3..f4efd1fb 100644 --- a/scrapegraphai/graphs/csv_scraper_graph.py +++ b/scrapegraphai/graphs/csv_scraper_graph.py @@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph from ..nodes import ( FetchNode, - RAGNode, GenerateAnswerCSVNode ) @@ -37,14 +36,7 @@ class CSVScraperGraph(AbstractGraph): input="csv | csv_dir", output=["doc"], ) - rag_node = RAGNode( - input="user_prompt & doc", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model, - } - ) + generate_answer_node = GenerateAnswerCSVNode( input="user_prompt & (relevant_chunks | doc)", output=["answer"], @@ -58,12 +50,10 @@ class CSVScraperGraph(AbstractGraph): return BaseGraph( nodes=[ fetch_node, - rag_node, generate_answer_node, ], edges=[ - (fetch_node, rag_node), - (rag_node, generate_answer_node) + (fetch_node, generate_answer_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index b85a34dc..fe54ebec 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph from ..nodes import ( FetchNode, - RAGNode, GenerateAnswerNode ) @@ -62,14 +61,7 @@ class JSONScraperGraph(AbstractGraph): input="json | json_dir", output=["doc", "link_urls", "img_urls"], ) - rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) + generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], @@ -83,12 +75,10 @@ class JSONScraperGraph(AbstractGraph): return BaseGraph( nodes=[ fetch_node, - rag_node, generate_answer_node, ], edges=[ - (fetch_node, rag_node), - (rag_node, generate_answer_node) + (fetch_node, generate_answer_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/graphs/markdown_scraper_graph.py b/scrapegraphai/graphs/markdown_scraper_graph.py index 66b161dc..c177facd 100644 --- a/scrapegraphai/graphs/markdown_scraper_graph.py +++ b/scrapegraphai/graphs/markdown_scraper_graph.py @@ -3,7 +3,7 @@ import logging from pydantic import BaseModel from .base_graph import BaseGraph from .abstract_graph import AbstractGraph -from ..nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode +from ..nodes import FetchNode, ParseNode, GenerateAnswerNode class MDScraperGraph(AbstractGraph): """ @@ -63,14 +63,6 @@ class MDScraperGraph(AbstractGraph): "chunk_size": self.model_token } ) - rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], @@ -86,13 +78,11 @@ class MDScraperGraph(AbstractGraph): nodes=[ fetch_node, parse_node, - rag_node, generate_answer_node, ], edges=[ (fetch_node, parse_node), - (parse_node, rag_node), - (rag_node, generate_answer_node) + (parse_node, generate_answer_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/graphs/omni_scraper_graph.py b/scrapegraphai/graphs/omni_scraper_graph.py index 7e34dab7..1965dc04 100644 --- a/scrapegraphai/graphs/omni_scraper_graph.py +++ b/scrapegraphai/graphs/omni_scraper_graph.py @@ -12,7 +12,6 @@ from ..nodes import ( FetchNode, ParseNode, ImageToTextNode, - RAGNode, GenerateAnswerOmniNode ) @@ -89,14 +88,7 @@ class OmniScraperGraph(AbstractGraph): "max_images": self.max_images } ) - rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) + generate_answer_omni_node = GenerateAnswerOmniNode( input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc", output=["answer"], @@ -112,14 +104,12 @@ class OmniScraperGraph(AbstractGraph): fetch_node, parse_node, image_to_text_node, - rag_node, generate_answer_omni_node, ], edges=[ (fetch_node, parse_node), (parse_node, image_to_text_node), - (image_to_text_node, rag_node), - (rag_node, generate_answer_omni_node) + (image_to_text_node, generate_answer_omni_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ @@ -136,4 +126,4 @@ class OmniScraperGraph(AbstractGraph): inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") \ No newline at end of file + return self.final_state.get("answer", "No answer found.") diff --git a/scrapegraphai/graphs/pdf_scraper_graph.py b/scrapegraphai/graphs/pdf_scraper_graph.py index 732b4789..049425d0 100644 --- a/scrapegraphai/graphs/pdf_scraper_graph.py +++ b/scrapegraphai/graphs/pdf_scraper_graph.py @@ -12,7 +12,6 @@ from .abstract_graph import AbstractGraph from ..nodes import ( FetchNode, ParseNode, - RAGNode, GenerateAnswerPDFNode ) @@ -76,14 +75,6 @@ class PDFScraperGraph(AbstractGraph): } ) - rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) generate_answer_node_pdf = GenerateAnswerPDFNode( input="user_prompt & (relevant_chunks | doc)", output=["answer"], @@ -98,13 +89,11 @@ class PDFScraperGraph(AbstractGraph): nodes=[ fetch_node, parse_node, - rag_node, generate_answer_node_pdf, ], edges=[ (fetch_node, parse_node), - (parse_node, rag_node), - (rag_node, generate_answer_node_pdf) + (parse_node, generate_answer_node_pdf) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index ba27b60e..7862f88f 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -78,14 +78,7 @@ class SmartScraperGraph(AbstractGraph): "chunk_size": self.model_token } ) - rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) + generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], @@ -100,13 +93,11 @@ class SmartScraperGraph(AbstractGraph): nodes=[ fetch_node, parse_node, - rag_node, generate_answer_node, ], edges=[ (fetch_node, parse_node), - (parse_node, rag_node), - (rag_node, generate_answer_node) + (parse_node, generate_answer_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/graphs/xml_scraper_graph.py b/scrapegraphai/graphs/xml_scraper_graph.py index 28c58bb2..24b1ff0d 100644 --- a/scrapegraphai/graphs/xml_scraper_graph.py +++ b/scrapegraphai/graphs/xml_scraper_graph.py @@ -10,7 +10,6 @@ from .abstract_graph import AbstractGraph from ..nodes import ( FetchNode, - RAGNode, GenerateAnswerNode ) @@ -64,14 +63,7 @@ class XMLScraperGraph(AbstractGraph): input="xml | xml_dir", output=["doc", "link_urls", "img_urls"] ) - rag_node = RAGNode( - input="user_prompt & doc", - output=["relevant_chunks"], - node_config={ - "llm_model": self.llm_model, - "embedder_model": self.embedder_model - } - ) + generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | doc)", output=["answer"], @@ -85,12 +77,10 @@ class XMLScraperGraph(AbstractGraph): return BaseGraph( nodes=[ fetch_node, - rag_node, generate_answer_node, ], edges=[ - (fetch_node, rag_node), - (rag_node, generate_answer_node) + (fetch_node, generate_answer_node) ], entry_point=fetch_node, graph_name=self.__class__.__name__ diff --git a/scrapegraphai/nodes/generate_answer_csv_node.py b/scrapegraphai/nodes/generate_answer_csv_node.py index 58adb1d4..6008dbdd 100644 --- a/scrapegraphai/nodes/generate_answer_csv_node.py +++ b/scrapegraphai/nodes/generate_answer_csv_node.py @@ -125,7 +125,7 @@ class GenerateAnswerCSVNode(BaseNode): template=template_no_chunks_csv_prompt, input_variables=["question"], partial_variables={ - "context": chunk.page_content, + "context": chunk, "format_instructions": format_instructions, }, ) @@ -137,7 +137,7 @@ class GenerateAnswerCSVNode(BaseNode): template=template_chunks_csv_prompt, input_variables=["question"], partial_variables={ - "context": chunk.page_content, + "context": chunk, "chunk_id": i + 1, "format_instructions": format_instructions, }, diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index fabb4e66..3ea8a128 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -115,7 +115,7 @@ class GenerateAnswerNode(BaseNode): prompt = PromptTemplate( template=template_no_chunks_prompt, input_variables=["question"], - partial_variables={"context": chunk.page_content, + partial_variables={"context": chunk, "format_instructions": format_instructions}) chain = prompt | self.llm_model | output_parser answer = chain.invoke({"question": user_prompt}) @@ -124,7 +124,7 @@ class GenerateAnswerNode(BaseNode): prompt = PromptTemplate( template=template_chunks_prompt, input_variables=["question"], - partial_variables={"context": chunk.page_content, + partial_variables={"context": chunk, "chunk_id": i + 1, "format_instructions": format_instructions}) # Dynamically name the chains based on their index diff --git a/scrapegraphai/nodes/generate_answer_omni_node.py b/scrapegraphai/nodes/generate_answer_omni_node.py index e6ea9206..f5474177 100644 --- a/scrapegraphai/nodes/generate_answer_omni_node.py +++ b/scrapegraphai/nodes/generate_answer_omni_node.py @@ -110,7 +110,7 @@ class GenerateAnswerOmniNode(BaseNode): template=template_no_chunk_omni_prompt, input_variables=["question"], partial_variables={ - "context": chunk.page_content, + "context": chunk, "format_instructions": format_instructions, "img_desc": imag_desc, }, @@ -123,7 +123,7 @@ class GenerateAnswerOmniNode(BaseNode): template=template_chunks_omni_prompt, input_variables=["question"], partial_variables={ - "context": chunk.page_content, + "context": chunk, "chunk_id": i + 1, "format_instructions": format_instructions, }, diff --git a/scrapegraphai/nodes/generate_answer_pdf_node.py b/scrapegraphai/nodes/generate_answer_pdf_node.py index c6509f34..fac25c06 100644 --- a/scrapegraphai/nodes/generate_answer_pdf_node.py +++ b/scrapegraphai/nodes/generate_answer_pdf_node.py @@ -124,7 +124,7 @@ class GenerateAnswerPDFNode(BaseNode): template=template_no_chunks_pdf_prompt, input_variables=["question"], partial_variables={ - "context":chunk.page_content, + "context":chunk, "format_instructions": format_instructions, }, )