fix: pdf scraper bug

This commit is contained in:
Marco Vinciguerra 2024-05-22 11:54:55 +02:00
parent 2e1c79d960
commit f2dffe534f

View File

@ -86,13 +86,14 @@ class FetchNode(BaseNode):
input_keys[0] == "json_dir" input_keys[0] == "json_dir"
or input_keys[0] == "xml_dir" or input_keys[0] == "xml_dir"
or input_keys[0] == "csv_dir" or input_keys[0] == "csv_dir"
or input_keys[0] == "pdf_dir"
): ):
compressed_document = [ compressed_document = [
Document(page_content=source, metadata={"source": "local_dir"}) Document(page_content=source, metadata={"source": "local_dir"})
] ]
state.update({self.output[0]: compressed_document}) state.update({self.output[0]: compressed_document})
return state return state
# handling for pdf # handling for pdf
elif input_keys[0] == "pdf": elif input_keys[0] == "pdf":
loader = PyPDFLoader(source) loader = PyPDFLoader(source)
@ -108,7 +109,7 @@ class FetchNode(BaseNode):
] ]
state.update({self.output[0]: compressed_document}) state.update({self.output[0]: compressed_document})
return state return state
elif input_keys[0] == "json": elif input_keys[0] == "json":
f = open(source) f = open(source)
compressed_document = [ compressed_document = [