From 76ce257efb9d9f46c0693472a1fe54b39e4eb1ef Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 25 Sep 2024 15:12:29 +0200 Subject: [PATCH] fix: update to pydantic documentation --- .../code_generator_graph_anthropic.py | 2 +- examples/azure/code_generator_graph_azure.py | 2 +- .../bedrock/code_generator_graph_bedrock.py | 2 +- .../deepseek/code_generator_graph_deepseek.py | 2 +- examples/ernie/code_generator_graph_ernie.py | 2 +- .../code_generator_graph_fireworks.py | 2 +- .../code_generator_graph_gemini.py | 2 +- .../code_generator_graph_vertex.py | 2 +- examples/groq/code_generator_graph_groq.py | 2 +- .../code_generator_graph_huggingfacehub.py | 2 +- .../code_generator_graph_ollama.py | 2 +- .../mistral/code_generator_graph_mistral.py | 2 +- .../moonshot/code_generator_graph_moonshot.py | 2 +- .../nemotron/code_generator_graph_nemotron.py | 2 +- .../oneapi/code_generator_graph_oneapi.py | 2 +- .../openai/code_generator_graph_openai.py | 2 +- extracted_data.py | 28 +++++++++++++++++++ scrapegraphai/nodes/fetch_node.py | 1 - scrapegraphai/utils/schema_trasform.py | 8 +++--- 19 files changed, 48 insertions(+), 21 deletions(-) create mode 100644 extracted_data.py diff --git a/examples/anthropic/code_generator_graph_anthropic.py b/examples/anthropic/code_generator_graph_anthropic.py index 49bd413d..c1a41ea3 100644 --- a/examples/anthropic/code_generator_graph_anthropic.py +++ b/examples/anthropic/code_generator_graph_anthropic.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/azure/code_generator_graph_azure.py b/examples/azure/code_generator_graph_azure.py index 79be4534..ad48933f 100644 --- a/examples/azure/code_generator_graph_azure.py +++ b/examples/azure/code_generator_graph_azure.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/bedrock/code_generator_graph_bedrock.py b/examples/bedrock/code_generator_graph_bedrock.py index 2998873b..7a0561fe 100644 --- a/examples/bedrock/code_generator_graph_bedrock.py +++ b/examples/bedrock/code_generator_graph_bedrock.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/deepseek/code_generator_graph_deepseek.py b/examples/deepseek/code_generator_graph_deepseek.py index 17b1a970..cc4670b7 100644 --- a/examples/deepseek/code_generator_graph_deepseek.py +++ b/examples/deepseek/code_generator_graph_deepseek.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/ernie/code_generator_graph_ernie.py b/examples/ernie/code_generator_graph_ernie.py index 1545238b..65b25b54 100644 --- a/examples/ernie/code_generator_graph_ernie.py +++ b/examples/ernie/code_generator_graph_ernie.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/fireworks/code_generator_graph_fireworks.py b/examples/fireworks/code_generator_graph_fireworks.py index 9bbec7f2..aa606b1e 100644 --- a/examples/fireworks/code_generator_graph_fireworks.py +++ b/examples/fireworks/code_generator_graph_fireworks.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/google_genai/code_generator_graph_gemini.py b/examples/google_genai/code_generator_graph_gemini.py index 4d16fdff..06b448cf 100644 --- a/examples/google_genai/code_generator_graph_gemini.py +++ b/examples/google_genai/code_generator_graph_gemini.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/google_vertexai/code_generator_graph_vertex.py b/examples/google_vertexai/code_generator_graph_vertex.py index 0d1399ea..28f40174 100644 --- a/examples/google_vertexai/code_generator_graph_vertex.py +++ b/examples/google_vertexai/code_generator_graph_vertex.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/groq/code_generator_graph_groq.py b/examples/groq/code_generator_graph_groq.py index 1f7d6b37..c78d7c29 100644 --- a/examples/groq/code_generator_graph_groq.py +++ b/examples/groq/code_generator_graph_groq.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/huggingfacehub/code_generator_graph_huggingfacehub.py b/examples/huggingfacehub/code_generator_graph_huggingfacehub.py index 085df3eb..4ff0d67e 100644 --- a/examples/huggingfacehub/code_generator_graph_huggingfacehub.py +++ b/examples/huggingfacehub/code_generator_graph_huggingfacehub.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph from langchain_community.llms import HuggingFaceEndpoint from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings diff --git a/examples/local_models/code_generator_graph_ollama.py b/examples/local_models/code_generator_graph_ollama.py index 9246e952..46ab8ab3 100644 --- a/examples/local_models/code_generator_graph_ollama.py +++ b/examples/local_models/code_generator_graph_ollama.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/mistral/code_generator_graph_mistral.py b/examples/mistral/code_generator_graph_mistral.py index 4abdf1f5..b9f7bdb9 100644 --- a/examples/mistral/code_generator_graph_mistral.py +++ b/examples/mistral/code_generator_graph_mistral.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/moonshot/code_generator_graph_moonshot.py b/examples/moonshot/code_generator_graph_moonshot.py index 11f9fb47..58e6182b 100644 --- a/examples/moonshot/code_generator_graph_moonshot.py +++ b/examples/moonshot/code_generator_graph_moonshot.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from langchain_community.chat_models.moonshot import MoonshotChat from scrapegraphai.graphs import CodeGeneratorGraph diff --git a/examples/nemotron/code_generator_graph_nemotron.py b/examples/nemotron/code_generator_graph_nemotron.py index 1f0ea276..c2ad8ab4 100644 --- a/examples/nemotron/code_generator_graph_nemotron.py +++ b/examples/nemotron/code_generator_graph_nemotron.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/oneapi/code_generator_graph_oneapi.py b/examples/oneapi/code_generator_graph_oneapi.py index 0bbb3ba2..aff40a3e 100644 --- a/examples/oneapi/code_generator_graph_oneapi.py +++ b/examples/oneapi/code_generator_graph_oneapi.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/examples/openai/code_generator_graph_openai.py b/examples/openai/code_generator_graph_openai.py index 21a4a02f..fd2b7ddb 100644 --- a/examples/openai/code_generator_graph_openai.py +++ b/examples/openai/code_generator_graph_openai.py @@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema import os, json from typing import List from dotenv import load_dotenv -from langchain_core.pydantic_v1 import BaseModel, Field +from pydantic import BaseModel, Field from scrapegraphai.graphs import CodeGeneratorGraph load_dotenv() diff --git a/extracted_data.py b/extracted_data.py new file mode 100644 index 00000000..45da5e49 --- /dev/null +++ b/extracted_data.py @@ -0,0 +1,28 @@ +def extract_data(html: str) -> dict: + from bs4 import BeautifulSoup + + # Parse the HTML content using BeautifulSoup + soup = BeautifulSoup(html, 'html.parser') + + # Initialize an empty list to hold project data + projects = [] + + # Find all project entries in the HTML + project_entries = soup.find_all('div', class_='grid-item') + + # Iterate over each project entry to extract title and description + for entry in project_entries: + # Extract the title from the card-title class + title = entry.find('h4', class_='card-title').get_text(strip=True) + + # Extract the description from the card-text class + description = entry.find('p', class_='card-text').get_text(strip=True) + + # Append the extracted data as a dictionary to the projects list + projects.append({ + 'title': title, + 'description': description + }) + + # Return the structured data as a dictionary matching the desired JSON schema + return {'projects': projects} \ No newline at end of file diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 7f1ce4eb..053a655b 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -319,4 +319,3 @@ class FetchNode(BaseNode): state["original_html"] = document state.update({self.output[0]: compressed_document,}) return state - diff --git a/scrapegraphai/utils/schema_trasform.py b/scrapegraphai/utils/schema_trasform.py index af752470..49e67ee0 100644 --- a/scrapegraphai/utils/schema_trasform.py +++ b/scrapegraphai/utils/schema_trasform.py @@ -12,7 +12,7 @@ def transform_schema(pydantic_schema): Returns: dict: The transformed JSON schema. """ - + def process_properties(properties): result = {} for key, value in properties.items(): @@ -20,7 +20,7 @@ def transform_schema(pydantic_schema): if value['type'] == 'array': if '$ref' in value['items']: ref_key = value['items']['$ref'].split('/')[-1] - result[key] = [process_properties(pydantic_schema['definitions'][ref_key]['properties'])] + result[key] = [process_properties(pydantic_schema['$defs'][ref_key]['properties'])] else: result[key] = [value['items']['type']] else: @@ -30,7 +30,7 @@ def transform_schema(pydantic_schema): } elif '$ref' in value: ref_key = value['$ref'].split('/')[-1] - result[key] = process_properties(pydantic_schema['definitions'][ref_key]['properties']) + result[key] = process_properties(pydantic_schema['$defs'][ref_key]['properties']) return result - return process_properties(pydantic_schema['properties']) \ No newline at end of file + return process_properties(pydantic_schema['properties'])