mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
fix: pdf scraper bug
This commit is contained in:
parent
2e1c79d960
commit
f2dffe534f
@ -86,13 +86,14 @@ class FetchNode(BaseNode):
|
|||||||
input_keys[0] == "json_dir"
|
input_keys[0] == "json_dir"
|
||||||
or input_keys[0] == "xml_dir"
|
or input_keys[0] == "xml_dir"
|
||||||
or input_keys[0] == "csv_dir"
|
or input_keys[0] == "csv_dir"
|
||||||
|
or input_keys[0] == "pdf_dir"
|
||||||
):
|
):
|
||||||
compressed_document = [
|
compressed_document = [
|
||||||
Document(page_content=source, metadata={"source": "local_dir"})
|
Document(page_content=source, metadata={"source": "local_dir"})
|
||||||
]
|
]
|
||||||
state.update({self.output[0]: compressed_document})
|
state.update({self.output[0]: compressed_document})
|
||||||
return state
|
return state
|
||||||
|
|
||||||
# handling for pdf
|
# handling for pdf
|
||||||
elif input_keys[0] == "pdf":
|
elif input_keys[0] == "pdf":
|
||||||
loader = PyPDFLoader(source)
|
loader = PyPDFLoader(source)
|
||||||
@ -108,7 +109,7 @@ class FetchNode(BaseNode):
|
|||||||
]
|
]
|
||||||
state.update({self.output[0]: compressed_document})
|
state.update({self.output[0]: compressed_document})
|
||||||
return state
|
return state
|
||||||
|
|
||||||
elif input_keys[0] == "json":
|
elif input_keys[0] == "json":
|
||||||
f = open(source)
|
f = open(source)
|
||||||
compressed_document = [
|
compressed_document = [
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user