mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
add sche,a example
This commit is contained in:
parent
82962365b6
commit
ecd98b2a45
77
examples/anthropic/smart_scraper_schema_haiku.py
Normal file
77
examples/anthropic/smart_scraper_schema_haiku.py
Normal file
@ -0,0 +1,77 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
||||
|
||||
|
||||
# required environment variables in .env
|
||||
# HUGGINGFACEHUB_API_TOKEN
|
||||
# ANTHROPIC_API_KEY
|
||||
load_dotenv()
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
|
||||
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
|
||||
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
"max_tokens": 4000},
|
||||
"embeddings": {"model_instance": embedder_model_instance}
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
schema=schema,
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
67
examples/bedrock/smart_scraper_schema_bedrock.py
Normal file
67
examples/bedrock/smart_scraper_schema_bedrock.py
Normal file
@ -0,0 +1,67 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
68
examples/deepseek/smart_scraper_schema_deepseek.py
Normal file
68
examples/deepseek/smart_scraper_schema_deepseek.py
Normal file
@ -0,0 +1,68 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "deepseek-chat",
|
||||
"openai_api_key": deepseek_key,
|
||||
"openai_api_base": 'https://api.deepseek.com/v1',
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
64
examples/gemini/smart_scraper_schema_gemini.py
Normal file
64
examples/gemini/smart_scraper_schema_gemini.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": gemini_key,
|
||||
"model": "gemini-pro",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the news with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.wired.com",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
75
examples/groq/smart_scraper_schema_groq_openai.py
Normal file
75
examples/groq/smart_scraper_schema_groq_openai.py
Normal file
@ -0,0 +1,75 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"embeddings": {
|
||||
"api_key": openai_key,
|
||||
"model": "openai",
|
||||
},
|
||||
"headless": False
|
||||
}
|
||||
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
55
examples/local_models/smart_scraper_schema_ollama.py
Normal file
55
examples/local_models/smart_scraper_schema_ollama.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
schema= """
|
||||
{
|
||||
"Projects": [
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
},
|
||||
"Project #":
|
||||
{
|
||||
"title": "...",
|
||||
"description": "...",
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/mistral",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=schema,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
@ -1,5 +1,5 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
|
||||
Loading…
Reference in New Issue
Block a user