mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-01 21:00:48 +08:00
fix: node refiner + examples
This commit is contained in:
parent
bb375cd12e
commit
d55f6bee47
@ -3,13 +3,12 @@ Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
|
||||
@ -6,8 +6,6 @@
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
# generate-hashes: false
|
||||
# universal: false
|
||||
|
||||
-e file:.
|
||||
aiofiles==24.1.0
|
||||
@ -131,7 +129,6 @@ graphviz==0.20.3
|
||||
# via burr
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
# via sqlalchemy
|
||||
grpcio==1.65.4
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
@ -501,7 +498,5 @@ urllib3==1.26.19
|
||||
# via requests
|
||||
uvicorn==0.30.5
|
||||
# via burr
|
||||
watchdog==4.0.2
|
||||
# via streamlit
|
||||
yarl==1.9.4
|
||||
# via aiohttp
|
||||
|
||||
@ -6,8 +6,6 @@
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
# generate-hashes: false
|
||||
# universal: false
|
||||
|
||||
-e file:.
|
||||
aiohttp==3.9.5
|
||||
@ -86,7 +84,6 @@ googleapis-common-protos==1.63.2
|
||||
# via grpcio-status
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
# via sqlalchemy
|
||||
grpcio==1.65.1
|
||||
# via google-api-core
|
||||
# via grpcio-status
|
||||
|
||||
@ -6,6 +6,7 @@ import logging
|
||||
from pydantic import BaseModel
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from ..utils.save_code_to_file import save_code_to_file
|
||||
from ..nodes import (
|
||||
FetchNode,
|
||||
ParseNode,
|
||||
@ -172,17 +173,6 @@ class CodeGeneratorGraph(AbstractGraph):
|
||||
else:
|
||||
filename = self.config.get("filename")
|
||||
|
||||
self.save_code_to_file(generated_code, filename)
|
||||
save_code_to_file(generated_code, filename)
|
||||
|
||||
return generated_code
|
||||
|
||||
def save_code_to_file(self, code: str, filename:str) -> None:
|
||||
"""
|
||||
Saves the generated code to a Python file.
|
||||
|
||||
Args:
|
||||
code (str): The generated code to be saved.
|
||||
filename (str): name of the output file
|
||||
"""
|
||||
with open(filename, "w") as file:
|
||||
file.write(code)
|
||||
|
||||
@ -73,18 +73,15 @@ class HtmlAnalyzerNode(BaseNode):
|
||||
KeyError: If the input keys are not found in the state, indicating
|
||||
that the necessary information for generating an answer is missing.
|
||||
"""
|
||||
|
||||
self.logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
input_data = [state[key] for key in input_keys]
|
||||
refined_prompt = input_data[0] # get refined user prompt
|
||||
html = input_data[1] # get HTML code
|
||||
|
||||
reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0)) # reduce HTML code
|
||||
|
||||
if self.additional_info is not None: # use additional context if present
|
||||
refined_prompt = input_data[0]
|
||||
html = input_data[1]
|
||||
reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0))
|
||||
|
||||
if self.additional_info is not None:
|
||||
prompt = PromptTemplate(
|
||||
template=TEMPLATE_HTML_ANALYSIS_WITH_CONTEXT,
|
||||
partial_variables={"initial_analysis": refined_prompt,
|
||||
@ -103,4 +100,3 @@ class HtmlAnalyzerNode(BaseNode):
|
||||
|
||||
state.update({self.output[0]: html_analysis, self.output[1]: reduced_html})
|
||||
return state
|
||||
|
||||
|
||||
@ -79,7 +79,7 @@ class PromptRefinerNode(BaseNode):
|
||||
KeyError: If the input keys are not found in the state, indicating
|
||||
that the necessary information for generating an answer is missing.
|
||||
"""
|
||||
|
||||
|
||||
self.logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
user_prompt = state['user_prompt']
|
||||
|
||||
@ -23,5 +23,8 @@ from .cleanup_code import extract_code
|
||||
from .dict_content_compare import are_content_equal
|
||||
from .code_error_analysis import (syntax_focused_analysis, execution_focused_analysis,
|
||||
validation_focused_analysis, semantic_focused_analysis)
|
||||
from .code_error_correction import (syntax_focused_code_generation, execution_focused_code_generation,
|
||||
validation_focused_code_generation, semantic_focused_code_generation)
|
||||
from .code_error_correction import (syntax_focused_code_generation,
|
||||
execution_focused_code_generation,
|
||||
validation_focused_code_generation,
|
||||
semantic_focused_code_generation)
|
||||
from .save_code_to_file import save_code_to_file
|
||||
|
||||
14
scrapegraphai/utils/save_code_to_file.py
Normal file
14
scrapegraphai/utils/save_code_to_file.py
Normal file
@ -0,0 +1,14 @@
|
||||
"""
|
||||
save_code_to_file module
|
||||
"""
|
||||
|
||||
def save_code_to_file(code: str, filename:str) -> None:
|
||||
"""
|
||||
Saves the generated code to a Python file.
|
||||
|
||||
Args:
|
||||
code (str): The generated code to be saved.
|
||||
filename (str): name of the output file
|
||||
"""
|
||||
with open(filename, "w") as file:
|
||||
file.write(code)
|
||||
Loading…
Reference in New Issue
Block a user