fix: update to pydantic documentation

This commit is contained in:
Marco Vinciguerra 2024-09-25 15:12:29 +02:00
parent d55f6bee47
commit 76ce257efb
19 changed files with 48 additions and 21 deletions

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
from langchain_community.llms import HuggingFaceEndpoint from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import json import json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from langchain_community.chat_models.moonshot import MoonshotChat from langchain_community.chat_models.moonshot import MoonshotChat
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

View File

@ -5,7 +5,7 @@ Basic example of scraping pipeline using Code Generator with schema
import os, json import os, json
from typing import List from typing import List
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_core.pydantic_v1 import BaseModel, Field from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv() load_dotenv()

28
extracted_data.py Normal file
View File

@ -0,0 +1,28 @@
def extract_data(html: str) -> dict:
from bs4 import BeautifulSoup
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Initialize an empty list to hold project data
projects = []
# Find all project entries in the HTML
project_entries = soup.find_all('div', class_='grid-item')
# Iterate over each project entry to extract title and description
for entry in project_entries:
# Extract the title from the card-title class
title = entry.find('h4', class_='card-title').get_text(strip=True)
# Extract the description from the card-text class
description = entry.find('p', class_='card-text').get_text(strip=True)
# Append the extracted data as a dictionary to the projects list
projects.append({
'title': title,
'description': description
})
# Return the structured data as a dictionary matching the desired JSON schema
return {'projects': projects}

View File

@ -319,4 +319,3 @@ class FetchNode(BaseNode):
state["original_html"] = document state["original_html"] = document
state.update({self.output[0]: compressed_document,}) state.update({self.output[0]: compressed_document,})
return state return state

View File

@ -12,7 +12,7 @@ def transform_schema(pydantic_schema):
Returns: Returns:
dict: The transformed JSON schema. dict: The transformed JSON schema.
""" """
def process_properties(properties): def process_properties(properties):
result = {} result = {}
for key, value in properties.items(): for key, value in properties.items():
@ -20,7 +20,7 @@ def transform_schema(pydantic_schema):
if value['type'] == 'array': if value['type'] == 'array':
if '$ref' in value['items']: if '$ref' in value['items']:
ref_key = value['items']['$ref'].split('/')[-1] ref_key = value['items']['$ref'].split('/')[-1]
result[key] = [process_properties(pydantic_schema['definitions'][ref_key]['properties'])] result[key] = [process_properties(pydantic_schema['$defs'][ref_key]['properties'])]
else: else:
result[key] = [value['items']['type']] result[key] = [value['items']['type']]
else: else:
@ -30,7 +30,7 @@ def transform_schema(pydantic_schema):
} }
elif '$ref' in value: elif '$ref' in value:
ref_key = value['$ref'].split('/')[-1] ref_key = value['$ref'].split('/')[-1]
result[key] = process_properties(pydantic_schema['definitions'][ref_key]['properties']) result[key] = process_properties(pydantic_schema['$defs'][ref_key]['properties'])
return result return result
return process_properties(pydantic_schema['properties']) return process_properties(pydantic_schema['properties'])