fix(fetch-node): removed isSoup from default

This commit is contained in:
Marco Perini 2024-05-13 12:09:55 +02:00
parent 353382b4d3
commit 0c1594737f
3 changed files with 8 additions and 7 deletions

View File

@ -17,7 +17,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
"model": "gpt-4-turbo",
},
"max_results": 2,
"verbose": True,
@ -28,7 +28,7 @@ graph_config = {
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
prompt="List me the heir of the British throne.",
config=graph_config
)

View File

@ -30,7 +30,7 @@ graph_config = {
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
prompt="List me all the links in the page",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config

View File

@ -51,7 +51,8 @@ class FetchNode(BaseNode):
False if node_config is None else node_config.get("verbose", False)
)
self.useSoup = (
True if node_config is None else node_config.get("useSoup", True)
False if node_config is None else node_config.get("useSoup", False)
)
self.loader_kwargs = (
{} if node_config is None else node_config.get("loader_kwargs", {})
)
@ -117,7 +118,7 @@ class FetchNode(BaseNode):
pass
elif not source.startswith("http"):
compressed_document = [Document(page_content=cleanup_html(source),
compressed_document = [Document(page_content=cleanup_html(data, source),
metadata={"source": "local_dir"}
)]
@ -127,7 +128,7 @@ class FetchNode(BaseNode):
cleanedup_html = cleanup_html(response.text, source)
compressed_document = [Document(page_content=cleanedup_html)]
else:
print(f"Failed to retrieve contents from the webpage at url: {url}")
print(f"Failed to retrieve contents from the webpage at url: {source}")
else:
loader_kwargs = {}
@ -139,7 +140,7 @@ class FetchNode(BaseNode):
document = loader.load()
compressed_document = [
Document(page_content=cleanup_html(str(document[0].page_content)))
Document(page_content=cleanup_html(str(document[0].page_content), source), metadata={"source": source})
]
state.update({self.output[0]: compressed_document})