add sche,a example

This commit is contained in:
Marco Vinciguerra 2024-05-26 12:15:48 +02:00
parent 82962365b6
commit ecd98b2a45
7 changed files with 407 additions and 1 deletions

View File

@ -0,0 +1,77 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
# required environment variables in .env
# HUGGINGFACEHUB_API_TOKEN
# ANTHROPIC_API_KEY
load_dotenv()
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
# ************************************************
# Initialize the model instances
# ************************************************
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
)
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "claude-3-haiku-20240307",
"max_tokens": 4000},
"embeddings": {"model_instance": embedder_model_instance}
}
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
schema=schema,
source="https://perinim.github.io/projects/",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,67 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True,
"headless": False,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,68 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Define the configuration for the graph
# ************************************************
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
graph_config = {
"llm": {
"model": "deepseek-chat",
"openai_api_key": deepseek_key,
"openai_api_base": 'https://api.deepseek.com/v1',
},
"verbose": True,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,64 @@
"""
Basic example of scraping pipeline using SmartScraper with schema
"""
import os
from dotenv import load_dotenv
from scrapegraphai.utils import prettify_exec_info
from scrapegraphai.graphs import SmartScraperGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Define the configuration for the graph
# ************************************************
gemini_key = os.getenv("GOOGLE_APIKEY")
graph_config = {
"llm": {
"api_key": gemini_key,
"model": "gemini-pro",
},
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
# also accepts a string with the already downloaded HTML code
source="https://www.wired.com",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,75 @@
"""
Basic example of scraping pipeline using SmartScraper with schema
"""
import os, json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Define the configuration for the graph
# ************************************************
groq_key = os.getenv("GROQ_APIKEY")
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"model": "groq/gemma-7b-it",
"api_key": groq_key,
"temperature": 0
},
"embeddings": {
"api_key": openai_key,
"model": "openai",
},
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,55 @@
"""
Basic example of scraping pipeline using SmartScraper with schema
"""
import json
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# ************************************************
# Define the configuration for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -1,5 +1,5 @@
"""
Basic example of scraping pipeline using SmartScraper
Basic example of scraping pipeline using SmartScraper with schema
"""
import os, json