From 42334305186f2eaba56ae60107e3bdecf1e4e09d Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Tue, 16 Apr 2024 12:19:23 +0200 Subject: [PATCH] add integration on the fetch node --- scrapegraphai/nodes/fetch_node.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 12f69240..39a0b55f 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -6,6 +6,7 @@ from typing import List from langchain_community.document_loaders import AsyncHtmlLoader from langchain_core.documents import Document from .base_node import BaseNode +from ..utils.remover import remover class FetchNode(BaseNode): @@ -71,7 +72,7 @@ class FetchNode(BaseNode): # if it is a local directory if not source.startswith("http"): - document = [Document(page_content=source, metadata={ + document = [Document(page_content=remover(source), metadata={ "source": "local_dir" })] @@ -79,6 +80,5 @@ class FetchNode(BaseNode): else: loader = AsyncHtmlLoader(source) document = loader.load() - state.update({self.output[0]: document}) return state