From f01b55e89b1365760f0dce4fa15ac0e74d280c57 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Sat, 10 Aug 2024 11:41:51 +0200 Subject: [PATCH] fix: fetch node --- requirements-dev.lock | 36 ------------------------------- requirements.lock | 34 ----------------------------- scrapegraphai/nodes/fetch_node.py | 10 ++++----- 3 files changed, 5 insertions(+), 75 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index 39f2747d..c8620876 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,8 +6,6 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -# universal: false -e file:. aiofiles==24.1.0 @@ -112,7 +110,6 @@ filelock==3.15.4 # via huggingface-hub # via torch # via transformers - # via triton fireworks-ai==0.14.0 # via langchain-fireworks fonttools==4.53.1 @@ -188,7 +185,6 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright - # via sqlalchemy groq==0.9.0 # via langchain-groq grpc-google-iam-v1==0.13.1 @@ -362,34 +358,6 @@ numpy==1.26.4 # via shapely # via streamlit # via transformers -nvidia-cublas-cu12==12.1.3.1 - # via nvidia-cudnn-cu12 - # via nvidia-cusolver-cu12 - # via torch -nvidia-cuda-cupti-cu12==12.1.105 - # via torch -nvidia-cuda-nvrtc-cu12==12.1.105 - # via torch -nvidia-cuda-runtime-cu12==12.1.105 - # via torch -nvidia-cudnn-cu12==8.9.2.26 - # via torch -nvidia-cufft-cu12==11.0.2.54 - # via torch -nvidia-curand-cu12==10.3.2.106 - # via torch -nvidia-cusolver-cu12==11.4.5.107 - # via torch -nvidia-cusparse-cu12==12.1.0.106 - # via nvidia-cusolver-cu12 - # via torch -nvidia-nccl-cu12==2.19.3 - # via torch -nvidia-nvjitlink-cu12==12.6.20 - # via nvidia-cusolver-cu12 - # via nvidia-cusparse-cu12 -nvidia-nvtx-cu12==12.1.105 - # via torch openai==1.37.0 # via burr # via langchain-fireworks @@ -631,8 +599,6 @@ tqdm==4.66.4 transformers==4.43.3 # via langchain-huggingface # via sentence-transformers -triton==2.2.0 - # via torch typer==0.12.3 # via fastapi-cli typing-extensions==4.12.2 @@ -676,8 +642,6 @@ uvicorn==0.30.3 # via fastapi uvloop==0.19.0 # via uvicorn -watchdog==4.0.1 - # via streamlit watchfiles==0.22.0 # via uvicorn websockets==12.0 diff --git a/requirements.lock b/requirements.lock index 7957082f..c5cdc85f 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,8 +6,6 @@ # features: [] # all-features: false # with-sources: false -# generate-hashes: false -# universal: false -e file:. aiohttp==3.9.5 @@ -69,7 +67,6 @@ filelock==3.15.4 # via huggingface-hub # via torch # via transformers - # via triton fireworks-ai==0.14.0 # via langchain-fireworks free-proxy==1.1.1 @@ -136,7 +133,6 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright - # via sqlalchemy groq==0.9.0 # via langchain-groq grpc-google-iam-v1==0.13.1 @@ -267,34 +263,6 @@ numpy==1.26.4 # via sentence-transformers # via shapely # via transformers -nvidia-cublas-cu12==12.1.3.1 - # via nvidia-cudnn-cu12 - # via nvidia-cusolver-cu12 - # via torch -nvidia-cuda-cupti-cu12==12.1.105 - # via torch -nvidia-cuda-nvrtc-cu12==12.1.105 - # via torch -nvidia-cuda-runtime-cu12==12.1.105 - # via torch -nvidia-cudnn-cu12==8.9.2.26 - # via torch -nvidia-cufft-cu12==11.0.2.54 - # via torch -nvidia-curand-cu12==10.3.2.106 - # via torch -nvidia-cusolver-cu12==11.4.5.107 - # via torch -nvidia-cusparse-cu12==12.1.0.106 - # via nvidia-cusolver-cu12 - # via torch -nvidia-nccl-cu12==2.19.3 - # via torch -nvidia-nvjitlink-cu12==12.6.20 - # via nvidia-cusolver-cu12 - # via nvidia-cusparse-cu12 -nvidia-nvtx-cu12==12.1.105 - # via torch openai==1.37.0 # via langchain-fireworks # via langchain-openai @@ -446,8 +414,6 @@ tqdm==4.66.4 transformers==4.43.3 # via langchain-huggingface # via sentence-transformers -triton==2.2.0 - # via torch typing-extensions==4.12.2 # via anthropic # via anyio diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 08e44e0c..a0514f37 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -260,7 +260,7 @@ class FetchNode(BaseNode): if (isinstance(self.llm_model, ChatOpenAI) and not self.script_creator) or (self.force and not self.script_creator): - parsed_content = convert_to_md(source, input_data[0]) + parsed_content = convert_to_md(source, parsed_content) compressed_document = [Document(page_content=parsed_content)] else: @@ -288,14 +288,14 @@ class FetchNode(BaseNode): parsed_content = document[0].page_content if isinstance(self.llm_model, ChatOpenAI) and not self.script_creator or self.force and not self.script_creator and not self.openai_md_enabled: - parsed_content = convert_to_md(document[0].page_content, input_data[0]) + parsed_content = convert_to_md(document[0].page_content, parsed_content) compressed_document = [ Document(page_content=parsed_content, metadata={"source": "html file"}) ] return self.update_state(state, compressed_document) - + def update_state(self, state, compressed_document): """ Updates the state with the output data from the node. @@ -308,6 +308,6 @@ class FetchNode(BaseNode): Returns: dict: The updated state with the output data. """ - + state.update({self.output[0]: compressed_document,}) - return state \ No newline at end of file + return state