From 0ab31c3fdbd56652ed306e60109301f60e8042d3 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 9 May 2024 21:07:07 +0200 Subject: [PATCH] fix: add json integration --- scrapegraphai/graphs/json_scraper_graph.py | 4 ++-- scrapegraphai/nodes/fetch_node.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scrapegraphai/graphs/json_scraper_graph.py b/scrapegraphai/graphs/json_scraper_graph.py index aec41195..dc341eae 100644 --- a/scrapegraphai/graphs/json_scraper_graph.py +++ b/scrapegraphai/graphs/json_scraper_graph.py @@ -54,7 +54,7 @@ class JSONScraperGraph(AbstractGraph): """ fetch_node = FetchNode( - input="json_dir", + input="json", output=["doc"], ) parse_node = ParseNode( @@ -106,4 +106,4 @@ class JSONScraperGraph(AbstractGraph): inputs = {"user_prompt": self.prompt, self.input_key: self.source} self.final_state, self.execution_info = self.graph.execute(inputs) - return self.final_state.get("answer", "No answer found.") \ No newline at end of file + return self.final_state.get("answer", "No answer found.") diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index dfaf8bb6..52266b42 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -2,6 +2,7 @@ FetchNode Module """ import pandas as pd +import json from typing import List, Optional from langchain_community.document_loaders import AsyncChromiumLoader from langchain_core.documents import Document @@ -75,8 +76,13 @@ class FetchNode(BaseNode): compressed_document = loader.load() elif self.input == "csv": - compressed_document = [Document(page_content=pd.read_csv(source), metadata={ - "source": "xml" + compressed_document = [Document(page_content=str(pd.read_csv(source)), metadata={ + "source": "csv" + })] + elif self.input == "json": + f = open(source) + compressed_document = [Document(page_content=str(json.load(f)), metadata={ + "source": "json" })] elif self.input == "xml": with open(source, 'r', encoding='utf-8') as f: