mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-04 21:00:36 +08:00
fix: node refiner + examples
This commit is contained in:
parent
bb375cd12e
commit
d55f6bee47
@ -3,13 +3,12 @@ Basic example of scraping pipeline using ScriptCreatorGraph
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from typing import List
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||||
from scrapegraphai.utils import prettify_exec_info
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# ************************************************
|
# ************************************************
|
||||||
|
|||||||
@ -6,8 +6,6 @@
|
|||||||
# features: []
|
# features: []
|
||||||
# all-features: false
|
# all-features: false
|
||||||
# with-sources: false
|
# with-sources: false
|
||||||
# generate-hashes: false
|
|
||||||
# universal: false
|
|
||||||
|
|
||||||
-e file:.
|
-e file:.
|
||||||
aiofiles==24.1.0
|
aiofiles==24.1.0
|
||||||
@ -131,7 +129,6 @@ graphviz==0.20.3
|
|||||||
# via burr
|
# via burr
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
# via playwright
|
# via playwright
|
||||||
# via sqlalchemy
|
|
||||||
grpcio==1.65.4
|
grpcio==1.65.4
|
||||||
# via google-api-core
|
# via google-api-core
|
||||||
# via grpcio-status
|
# via grpcio-status
|
||||||
@ -501,7 +498,5 @@ urllib3==1.26.19
|
|||||||
# via requests
|
# via requests
|
||||||
uvicorn==0.30.5
|
uvicorn==0.30.5
|
||||||
# via burr
|
# via burr
|
||||||
watchdog==4.0.2
|
|
||||||
# via streamlit
|
|
||||||
yarl==1.9.4
|
yarl==1.9.4
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
|
|||||||
@ -6,8 +6,6 @@
|
|||||||
# features: []
|
# features: []
|
||||||
# all-features: false
|
# all-features: false
|
||||||
# with-sources: false
|
# with-sources: false
|
||||||
# generate-hashes: false
|
|
||||||
# universal: false
|
|
||||||
|
|
||||||
-e file:.
|
-e file:.
|
||||||
aiohttp==3.9.5
|
aiohttp==3.9.5
|
||||||
@ -86,7 +84,6 @@ googleapis-common-protos==1.63.2
|
|||||||
# via grpcio-status
|
# via grpcio-status
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
# via playwright
|
# via playwright
|
||||||
# via sqlalchemy
|
|
||||||
grpcio==1.65.1
|
grpcio==1.65.1
|
||||||
# via google-api-core
|
# via google-api-core
|
||||||
# via grpcio-status
|
# via grpcio-status
|
||||||
|
|||||||
@ -6,6 +6,7 @@ import logging
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from .abstract_graph import AbstractGraph
|
from .abstract_graph import AbstractGraph
|
||||||
|
from ..utils.save_code_to_file import save_code_to_file
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
@ -172,17 +173,6 @@ class CodeGeneratorGraph(AbstractGraph):
|
|||||||
else:
|
else:
|
||||||
filename = self.config.get("filename")
|
filename = self.config.get("filename")
|
||||||
|
|
||||||
self.save_code_to_file(generated_code, filename)
|
save_code_to_file(generated_code, filename)
|
||||||
|
|
||||||
return generated_code
|
return generated_code
|
||||||
|
|
||||||
def save_code_to_file(self, code: str, filename:str) -> None:
|
|
||||||
"""
|
|
||||||
Saves the generated code to a Python file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
code (str): The generated code to be saved.
|
|
||||||
filename (str): name of the output file
|
|
||||||
"""
|
|
||||||
with open(filename, "w") as file:
|
|
||||||
file.write(code)
|
|
||||||
|
|||||||
@ -73,18 +73,15 @@ class HtmlAnalyzerNode(BaseNode):
|
|||||||
KeyError: If the input keys are not found in the state, indicating
|
KeyError: If the input keys are not found in the state, indicating
|
||||||
that the necessary information for generating an answer is missing.
|
that the necessary information for generating an answer is missing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.logger.info(f"--- Executing {self.node_name} Node ---")
|
self.logger.info(f"--- Executing {self.node_name} Node ---")
|
||||||
|
|
||||||
input_keys = self.get_input_keys(state)
|
input_keys = self.get_input_keys(state)
|
||||||
|
|
||||||
input_data = [state[key] for key in input_keys]
|
input_data = [state[key] for key in input_keys]
|
||||||
refined_prompt = input_data[0] # get refined user prompt
|
refined_prompt = input_data[0]
|
||||||
html = input_data[1] # get HTML code
|
html = input_data[1]
|
||||||
|
reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0))
|
||||||
|
|
||||||
reduced_html = reduce_html(html[0].page_content, self.node_config.get("reduction", 0)) # reduce HTML code
|
if self.additional_info is not None:
|
||||||
|
|
||||||
if self.additional_info is not None: # use additional context if present
|
|
||||||
prompt = PromptTemplate(
|
prompt = PromptTemplate(
|
||||||
template=TEMPLATE_HTML_ANALYSIS_WITH_CONTEXT,
|
template=TEMPLATE_HTML_ANALYSIS_WITH_CONTEXT,
|
||||||
partial_variables={"initial_analysis": refined_prompt,
|
partial_variables={"initial_analysis": refined_prompt,
|
||||||
@ -103,4 +100,3 @@ class HtmlAnalyzerNode(BaseNode):
|
|||||||
|
|
||||||
state.update({self.output[0]: html_analysis, self.output[1]: reduced_html})
|
state.update({self.output[0]: html_analysis, self.output[1]: reduced_html})
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|||||||
@ -23,5 +23,8 @@ from .cleanup_code import extract_code
|
|||||||
from .dict_content_compare import are_content_equal
|
from .dict_content_compare import are_content_equal
|
||||||
from .code_error_analysis import (syntax_focused_analysis, execution_focused_analysis,
|
from .code_error_analysis import (syntax_focused_analysis, execution_focused_analysis,
|
||||||
validation_focused_analysis, semantic_focused_analysis)
|
validation_focused_analysis, semantic_focused_analysis)
|
||||||
from .code_error_correction import (syntax_focused_code_generation, execution_focused_code_generation,
|
from .code_error_correction import (syntax_focused_code_generation,
|
||||||
validation_focused_code_generation, semantic_focused_code_generation)
|
execution_focused_code_generation,
|
||||||
|
validation_focused_code_generation,
|
||||||
|
semantic_focused_code_generation)
|
||||||
|
from .save_code_to_file import save_code_to_file
|
||||||
|
|||||||
14
scrapegraphai/utils/save_code_to_file.py
Normal file
14
scrapegraphai/utils/save_code_to_file.py
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
"""
|
||||||
|
save_code_to_file module
|
||||||
|
"""
|
||||||
|
|
||||||
|
def save_code_to_file(code: str, filename:str) -> None:
|
||||||
|
"""
|
||||||
|
Saves the generated code to a Python file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
code (str): The generated code to be saved.
|
||||||
|
filename (str): name of the output file
|
||||||
|
"""
|
||||||
|
with open(filename, "w") as file:
|
||||||
|
file.write(code)
|
||||||
Loading…
Reference in New Issue
Block a user