mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
add integration on the fetch node
This commit is contained in:
parent
9661c77ebe
commit
4233430518
@ -6,6 +6,7 @@ from typing import List
|
||||
from langchain_community.document_loaders import AsyncHtmlLoader
|
||||
from langchain_core.documents import Document
|
||||
from .base_node import BaseNode
|
||||
from ..utils.remover import remover
|
||||
|
||||
|
||||
class FetchNode(BaseNode):
|
||||
@ -71,7 +72,7 @@ class FetchNode(BaseNode):
|
||||
|
||||
# if it is a local directory
|
||||
if not source.startswith("http"):
|
||||
document = [Document(page_content=source, metadata={
|
||||
document = [Document(page_content=remover(source), metadata={
|
||||
"source": "local_dir"
|
||||
})]
|
||||
|
||||
@ -79,6 +80,5 @@ class FetchNode(BaseNode):
|
||||
else:
|
||||
loader = AsyncHtmlLoader(source)
|
||||
document = loader.load()
|
||||
|
||||
state.update({self.output[0]: document})
|
||||
return state
|
||||
|
||||
Loading…
Reference in New Issue
Block a user