From f2dffe534f51aa83aed5ac491243604a443f4373 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 22 May 2024 11:54:55 +0200 Subject: [PATCH] fix: pdf scraper bug --- scrapegraphai/nodes/fetch_node.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 6528f098..6c9858c9 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -86,13 +86,14 @@ class FetchNode(BaseNode): input_keys[0] == "json_dir" or input_keys[0] == "xml_dir" or input_keys[0] == "csv_dir" + or input_keys[0] == "pdf_dir" ): compressed_document = [ Document(page_content=source, metadata={"source": "local_dir"}) ] state.update({self.output[0]: compressed_document}) return state - + # handling for pdf elif input_keys[0] == "pdf": loader = PyPDFLoader(source) @@ -108,7 +109,7 @@ class FetchNode(BaseNode): ] state.update({self.output[0]: compressed_document}) return state - + elif input_keys[0] == "json": f = open(source) compressed_document = [