feat: add conditional node structure to the smart_scraper_graph and implemented a structured way to check condition

This commit is contained in:
ekinsenler 2024-10-15 14:23:02 +03:00
parent 931b975d79
commit cacd9cde00
4 changed files with 91 additions and 58 deletions

View File

@ -18,4 +18,5 @@ undetected-playwright>=0.3.0
semchunk>=1.0.1
langchain-ollama>=0.1.3
simpleeval>=0.9.13
googlesearch-python>=1.2.5
googlesearch-python>=1.2.5
async_timeout>=4.0.3

View File

@ -2,7 +2,6 @@
SmartScraperGraph Module
"""
from typing import Optional
import logging
from pydantic import BaseModel
from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
@ -10,8 +9,10 @@ from ..nodes import (
FetchNode,
ParseNode,
ReasoningNode,
GenerateAnswerNode
GenerateAnswerNode,
ConditionalNode
)
from ..prompts import REGEN_ADDITIONAL_INFO
class SmartScraperGraph(AbstractGraph):
"""
@ -89,6 +90,28 @@ class SmartScraperGraph(AbstractGraph):
}
)
cond_node = None
regen_node = None
if self.config.get("reattempt") is True:
cond_node = ConditionalNode(
input="results",
output=["results"],
node_name="ConditionalNode",
node_config={
"key_name": "results",
"condition": 'results and results!="NA"',
}
)
regen_node = GenerateAnswerNode(
input="user_prompt & results",
output=["answer"],
node_config={
"llm_model": self.llm_model,
"additional_info": REGEN_ADDITIONAL_INFO,
"schema": self.schema,
}
)
if self.config.get("html_mode") is False:
parse_node = ParseNode(
input="doc",
@ -99,6 +122,7 @@ class SmartScraperGraph(AbstractGraph):
}
)
reasoning_node = None
if self.config.get("reasoning"):
reasoning_node = ReasoningNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
@ -109,68 +133,72 @@ class SmartScraperGraph(AbstractGraph):
"schema": self.schema,
}
)
# Define the graph variation configurations
# (html_mode, reasoning, reattempt)
graph_variation_config = {
(False, True, False): {
"nodes": [fetch_node, parse_node, reasoning_node, generate_answer_node],
"edges": [(fetch_node, parse_node), (parse_node, reasoning_node), (reasoning_node, generate_answer_node)]
},
(True, True, False): {
"nodes": [fetch_node, reasoning_node, generate_answer_node],
"edges": [(fetch_node, reasoning_node), (reasoning_node, generate_answer_node)]
},
(True, False, False): {
"nodes": [fetch_node, generate_answer_node],
"edges": [(fetch_node, generate_answer_node)]
},
(False, False, False): {
"nodes": [fetch_node, parse_node, generate_answer_node],
"edges": [(fetch_node, parse_node), (parse_node, generate_answer_node)]
},
(False, True, True): {
"nodes": [fetch_node, parse_node, reasoning_node, generate_answer_node, cond_node, regen_node],
"edges": [(fetch_node, parse_node), (parse_node, reasoning_node), (reasoning_node, generate_answer_node),
(generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
},
(True, True, True): {
"nodes": [fetch_node, reasoning_node, generate_answer_node, cond_node, regen_node],
"edges": [(fetch_node, reasoning_node), (reasoning_node, generate_answer_node),
(generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
},
(True, False, True): {
"nodes": [fetch_node, generate_answer_node, cond_node, regen_node],
"edges": [(fetch_node, generate_answer_node), (generate_answer_node, cond_node),
(cond_node, regen_node), (cond_node, None)]
},
(False, False, True): {
"nodes": [fetch_node, parse_node, generate_answer_node, cond_node, regen_node],
"edges": [(fetch_node, parse_node), (parse_node, generate_answer_node),
(generate_answer_node, cond_node), (cond_node, regen_node), (cond_node, None)]
}
}
if self.config.get("html_mode") is False and self.config.get("reasoning") is True:
# Get the current conditions
html_mode = self.config.get("html_mode", False)
reasoning = self.config.get("reasoning", False)
reattempt = self.config.get("reattempt", False)
# Retrieve the appropriate graph configuration
config = graph_variation_config.get((html_mode, reasoning, reattempt))
if config:
return BaseGraph(
nodes=[
fetch_node,
parse_node,
reasoning_node,
generate_answer_node,
],
edges=[
(fetch_node, parse_node),
(parse_node, reasoning_node),
(reasoning_node, generate_answer_node)
],
entry_point=fetch_node,
graph_name=self.__class__.__name__
)
elif self.config.get("html_mode") is True and self.config.get("reasoning") is True:
return BaseGraph(
nodes=[
fetch_node,
reasoning_node,
generate_answer_node,
],
edges=[
(fetch_node, reasoning_node),
(reasoning_node, generate_answer_node)
],
entry_point=fetch_node,
graph_name=self.__class__.__name__
)
elif self.config.get("html_mode") is True and self.config.get("reasoning") is False:
return BaseGraph(
nodes=[
fetch_node,
generate_answer_node,
],
edges=[
(fetch_node, generate_answer_node)
],
nodes=config["nodes"],
edges=config["edges"],
entry_point=fetch_node,
graph_name=self.__class__.__name__
)
# Default return if no conditions match
return BaseGraph(
nodes=[
fetch_node,
parse_node,
generate_answer_node,
],
edges=[
(fetch_node, parse_node),
(parse_node, generate_answer_node)
],
entry_point=fetch_node,
graph_name=self.__class__.__name__
)
nodes=[fetch_node, parse_node, generate_answer_node],
edges=[(fetch_node, parse_node), (parse_node, generate_answer_node)],
entry_point=fetch_node,
graph_name=self.__class__.__name__
)
def run(self) -> str:
"""
Executes the scraping process and returns the answer to the prompt.

View File

@ -5,7 +5,7 @@ __init__.py for the prompts folder
from .generate_answer_node_prompts import (TEMPLATE_CHUNKS,
TEMPLATE_NO_CHUNKS,
TEMPLATE_MERGE, TEMPLATE_CHUNKS_MD,
TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD)
TEMPLATE_NO_CHUNKS_MD, TEMPLATE_MERGE_MD, REGEN_ADDITIONAL_INFO)
from .generate_answer_node_csv_prompts import (TEMPLATE_CHUKS_CSV,
TEMPLATE_NO_CHUKS_CSV,
TEMPLATE_MERGE_CSV)

View File

@ -86,3 +86,7 @@ OUTPUT INSTRUCTIONS: {format_instructions}\n
USER QUESTION: {question}\n
WEBSITE CONTENT: {context}\n
"""
REGEN_ADDITIONAL_INFO = """
You are a scraper and you have just failed to scrape the requested information from a website. \n
I want you to try again and provide the missing informations. \n"""