diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 12f69240..39a0b55f 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -6,6 +6,7 @@ from typing import List from langchain_community.document_loaders import AsyncHtmlLoader from langchain_core.documents import Document from .base_node import BaseNode +from ..utils.remover import remover class FetchNode(BaseNode): @@ -71,7 +72,7 @@ class FetchNode(BaseNode): # if it is a local directory if not source.startswith("http"): - document = [Document(page_content=source, metadata={ + document = [Document(page_content=remover(source), metadata={ "source": "local_dir" })] @@ -79,6 +80,5 @@ class FetchNode(BaseNode): else: loader = AsyncHtmlLoader(source) document = loader.load() - state.update({self.output[0]: document}) return state