From d55f6bee4766f174abb2fdcd598542a9ca108a25 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 25 Sep 2024 14:53:51 +0200 Subject: [PATCH] fix: node refiner + examples --- examples/openai/script_generator_schema_openai.py | 5 ++--- requirements-dev.lock | 5 ----- requirements.lock | 3 --- scrapegraphai/graphs/code_generator_graph.py | 14 ++------------ scrapegraphai/nodes/html_analyzer_node.py | 14 +++++--------- scrapegraphai/nodes/prompt_refiner_node.py | 2 +- scrapegraphai/utils/__init__.py | 7 +++++-- scrapegraphai/utils/save_code_to_file.py | 14 ++++++++++++++ 8 files changed, 29 insertions(+), 35 deletions(-) create mode 100644 scrapegraphai/utils/save_code_to_file.py diff --git a/examples/openai/script_generator_schema_openai.py b/examples/openai/script_generator_schema_openai.py index 32d7745a..7611c029 100644 --- a/examples/openai/script_generator_schema_openai.py +++ b/examples/openai/script_generator_schema_openai.py @@ -3,13 +3,12 @@ Basic example of scraping pipeline using ScriptCreatorGraph """ import os +from typing import List from dotenv import load_dotenv +from pydantic import BaseModel, Field from scrapegraphai.graphs import ScriptCreatorGraph from scrapegraphai.utils import prettify_exec_info -from pydantic import BaseModel, Field -from typing import List - load_dotenv() # ************************************************ diff --git a/requirements-dev.lock b/requirements-dev.lock index 0523351a..1d9d469a 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,8 +6,6 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -# universal: false -e file:. aiofiles==24.1.0 @@ -131,7 +129,6 @@ graphviz==0.20.3 # via burr greenlet==3.0.3 # via playwright - # via sqlalchemy grpcio==1.65.4 # via google-api-core # via grpcio-status @@ -501,7 +498,5 @@ urllib3==1.26.19 # via requests uvicorn==0.30.5 # via burr -watchdog==4.0.2 - # via streamlit yarl==1.9.4 # via aiohttp diff --git a/requirements.lock b/requirements.lock index 6ee34ba9..84e25a0f 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,8 +6,6 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -# universal: false -e file:. aiohttp==3.9.5 @@ -86,7 +84,6 @@ googleapis-common-protos==1.63.2 # via grpcio-status greenlet==3.0.3 # via playwright - # via sqlalchemy grpcio==1.65.1 # via google-api-core # via grpcio-status diff --git a/scrapegraphai/graphs/code_generator_graph.py b/scrapegraphai/graphs/code_generator_graph.py index 6dcdf79e..9786dc4f 100644 --- a/scrapegraphai/graphs/code_generator_graph.py +++ b/scrapegraphai/graphs/code_generator_graph.py @@ -6,6 +6,7 @@ import logging from pydantic import BaseModel from .base_graph import BaseGraph from .abstract_graph import AbstractGraph +from ..utils.save_code_to_file import save_code_to_file from ..nodes import ( FetchNode, ParseNode, @@ -172,17 +173,6 @@ class CodeGeneratorGraph(AbstractGraph): else: filename = self.config.get("filename") - self.save_code_to_file(generated_code, filename) + save_code_to_file(generated_code, filename) return generated_code - - def save_code_to_file(self, code: str, filename:str) -> None: - """ - Saves the generated code to a Python file. - - Args: - code (str): The generated code to be saved. - filename (str): name of the output file - """ - with open(filename, "w") as file: - file.write(code) diff --git a/scrapegraphai/nodes/html_analyzer_node.py b/scrapegraphai/nodes/html_analyzer_node.py index d526315c..b07c4040 100644 --- a/scrapegraphai/nodes/html_analyzer_node.py +++ b/scrapegraphai/nodes/html_analyzer_node.py @@ -73,18 +73,15 @@ class HtmlAnalyzerNode(BaseNode): KeyError: If the input keys are not found in the state, indicating that the necessary information for generating an answer is missing. """ - self.logger.info(f"--- Executing {self.node_name} Node ---") input_keys = self.get_input_keys(state) - input_data = [state[key] for key in input_keys] - refined_prompt = input_data[0] # get refined user prompt - html = input_data[1] # get HTML code - - reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0)) # reduce HTML code - - if self.additional_info is not None: # use additional context if present + refined_prompt = input_data[0] + html = input_data[1] + reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0)) + + if self.additional_info is not None: prompt = PromptTemplate( template=TEMPLATE_HTML_ANALYSIS_WITH_CONTEXT, partial_variables={"initial_analysis": refined_prompt, @@ -103,4 +100,3 @@ class HtmlAnalyzerNode(BaseNode): state.update({self.output[0]: html_analysis, self.output[1]: reduced_html}) return state - diff --git a/scrapegraphai/nodes/prompt_refiner_node.py b/scrapegraphai/nodes/prompt_refiner_node.py index e6f4579c..dfb62eb6 100644 --- a/scrapegraphai/nodes/prompt_refiner_node.py +++ b/scrapegraphai/nodes/prompt_refiner_node.py @@ -79,7 +79,7 @@ class PromptRefinerNode(BaseNode): KeyError: If the input keys are not found in the state, indicating that the necessary information for generating an answer is missing. """ - + self.logger.info(f"--- Executing {self.node_name} Node ---") user_prompt = state['user_prompt'] diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index 303150f0..d5badca9 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -23,5 +23,8 @@ from .cleanup_code import extract_code from .dict_content_compare import are_content_equal from .code_error_analysis import (syntax_focused_analysis, execution_focused_analysis, validation_focused_analysis, semantic_focused_analysis) -from .code_error_correction import (syntax_focused_code_generation, execution_focused_code_generation, - validation_focused_code_generation, semantic_focused_code_generation) \ No newline at end of file +from .code_error_correction import (syntax_focused_code_generation, + execution_focused_code_generation, + validation_focused_code_generation, + semantic_focused_code_generation) +from .save_code_to_file import save_code_to_file diff --git a/scrapegraphai/utils/save_code_to_file.py b/scrapegraphai/utils/save_code_to_file.py new file mode 100644 index 00000000..55e70d8c --- /dev/null +++ b/scrapegraphai/utils/save_code_to_file.py @@ -0,0 +1,14 @@ +""" +save_code_to_file module +""" + +def save_code_to_file(code: str, filename:str) -> None: + """ + Saves the generated code to a Python file. + + Args: + code (str): The generated code to be saved. + filename (str): name of the output file + """ + with open(filename, "w") as file: + file.write(code)