mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
fix(fetch-node): removed isSoup from default
This commit is contained in:
parent
353382b4d3
commit
0c1594737f
@ -17,7 +17,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "gpt-4-turbo",
|
||||
},
|
||||
"max_results": 2,
|
||||
"verbose": True,
|
||||
@ -28,7 +28,7 @@ graph_config = {
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me the best escursions near Trento",
|
||||
prompt="List me the heir of the British throne.",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@ graph_config = {
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
prompt="List me all the links in the page",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
|
||||
@ -51,7 +51,8 @@ class FetchNode(BaseNode):
|
||||
False if node_config is None else node_config.get("verbose", False)
|
||||
)
|
||||
self.useSoup = (
|
||||
True if node_config is None else node_config.get("useSoup", True)
|
||||
False if node_config is None else node_config.get("useSoup", False)
|
||||
)
|
||||
self.loader_kwargs = (
|
||||
{} if node_config is None else node_config.get("loader_kwargs", {})
|
||||
)
|
||||
@ -117,7 +118,7 @@ class FetchNode(BaseNode):
|
||||
pass
|
||||
|
||||
elif not source.startswith("http"):
|
||||
compressed_document = [Document(page_content=cleanup_html(source),
|
||||
compressed_document = [Document(page_content=cleanup_html(data, source),
|
||||
metadata={"source": "local_dir"}
|
||||
)]
|
||||
|
||||
@ -127,7 +128,7 @@ class FetchNode(BaseNode):
|
||||
cleanedup_html = cleanup_html(response.text, source)
|
||||
compressed_document = [Document(page_content=cleanedup_html)]
|
||||
else:
|
||||
print(f"Failed to retrieve contents from the webpage at url: {url}")
|
||||
print(f"Failed to retrieve contents from the webpage at url: {source}")
|
||||
|
||||
else:
|
||||
loader_kwargs = {}
|
||||
@ -139,7 +140,7 @@ class FetchNode(BaseNode):
|
||||
|
||||
document = loader.load()
|
||||
compressed_document = [
|
||||
Document(page_content=cleanup_html(str(document[0].page_content)))
|
||||
Document(page_content=cleanup_html(str(document[0].page_content), source), metadata={"source": source})
|
||||
]
|
||||
|
||||
state.update({self.output[0]: compressed_document})
|
||||
|
||||
Loading…
Reference in New Issue
Block a user