mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-01 21:00:48 +08:00
feat(AbstractGraph): add adjustable rate limit
This commit is contained in:
parent
81af62d35f
commit
2859fb72d6
48
examples/anthropic/rate_limit_haiku.py
Normal file
48
examples/anthropic/rate_limit_haiku.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
|
||||||
|
# required environment variables in .env
|
||||||
|
# ANTHROPIC_API_KEY
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||||
|
"model": "anthropic/claude-3-haiku-20240307",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
|
||||||
|
event_end_date, event_end_time, location, event_mode, event_category,
|
||||||
|
third_party_redirect, no_of_days,
|
||||||
|
time_in_hours, hosted_or_attending, refreshments_type,
|
||||||
|
registration_available, registration_link""",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://www.hmhco.com/event",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
57
examples/azure/rate_limit_azure.py
Normal file
57
examples/azure/rate_limit_azure.py
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
|
||||||
|
# required environment variable in .env
|
||||||
|
# AZURE_OPENAI_ENDPOINT
|
||||||
|
# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
|
||||||
|
# MODEL_NAME
|
||||||
|
# AZURE_OPENAI_API_KEY
|
||||||
|
# OPENAI_API_TYPE
|
||||||
|
# AZURE_OPENAI_API_VERSION
|
||||||
|
# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Initialize the model instances
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||||
|
"model": "azure_openai/gpt-3.5-turbo",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False
|
||||||
|
}
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
|
||||||
|
event_end_date, event_end_time, location, event_mode, event_category,
|
||||||
|
third_party_redirect, no_of_days,
|
||||||
|
time_in_hours, hosted_or_attending, refreshments_type,
|
||||||
|
registration_available, registration_link""",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://www.hmhco.com/event",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
47
examples/bedrock/rate_limit_bedrock.py
Normal file
47
examples/bedrock/rate_limit_bedrock.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"client": "client_name",
|
||||||
|
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||||
|
"temperature": 0.0,
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
49
examples/deepseek/rate_limit_deepseek.py
Normal file
49
examples/deepseek/rate_limit_deepseek.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"model": "deepseek/deepseek-chat",
|
||||||
|
"api_key": deepseek_key,
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description.",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
49
examples/ernie/rate_limit_ernie.py
Normal file
49
examples/ernie/rate_limit_ernie.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"model": "ernie/ernie-bot-turbo",
|
||||||
|
"ernie_client_id": "<ernie_client_id>",
|
||||||
|
"ernie_client_secret": "<ernie_client_secret>",
|
||||||
|
"temperature": 0.1,
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"library": "beautifulsoup"
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
50
examples/fireworks/rate_limit_fireworks.py
Normal file
50
examples/fireworks/rate_limit_fireworks.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": fireworks_api_key,
|
||||||
|
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
47
examples/google_genai/rate_limit_gemini.py
Normal file
47
examples/google_genai/rate_limit_gemini.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": gemini_key,
|
||||||
|
"model": "google_genai/gemini-pro",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the news with their description.",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://www.wired.com",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
47
examples/google_vertexai/rate_limit_gemini.py
Normal file
47
examples/google_vertexai/rate_limit_gemini.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": gemini_key,
|
||||||
|
"model": "google_vertexai/gemini-1.5-pro",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the news with their description.",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://www.wired.com",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
49
examples/groq/rate_limit_groq.py
Normal file
49
examples/groq/rate_limit_groq.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
groq_key = os.getenv("GROQ_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"model": "groq/gemma-7b-it",
|
||||||
|
"api_key": groq_key,
|
||||||
|
"temperature": 0,
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"headless": False
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description.",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
46
examples/mistral/rate_limit_mistral.py
Normal file
46
examples/mistral/rate_limit_mistral.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": os.getenv("MISTRAL_API_KEY"),
|
||||||
|
"model": "mistralai/open-mistral-nemo",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me what does the company do, the name and a contact email.",
|
||||||
|
source="https://scrapegraphai.com/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
46
examples/nemotron/rate_limit_nemotron.py
Normal file
46
examples/nemotron/rate_limit_nemotron.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": os.getenv("NEMOTRON_KEY"),
|
||||||
|
"model": "nvidia/meta/llama3-70b-instruct",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="Extract me the python code inside the page",
|
||||||
|
source="https://www.exploit-db.com/exploits/51447",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
41
examples/oneapi/rate_limit_oneapi.py
Normal file
41
examples/oneapi/rate_limit_oneapi.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": "***************************",
|
||||||
|
"model": "oneapi/qwen-turbo",
|
||||||
|
"base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the titles",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://www.wired.com/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
48
examples/openai/rate_limit_openai.py
Normal file
48
examples/openai/rate_limit_openai.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
"model": "openai/gpt-4o",
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me what does the company do, the name and a contact email.",
|
||||||
|
source="https://scrapegraphai.com/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
49
examples/together/rate_limit_together.py
Normal file
49
examples/together/rate_limit_together.py
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
from scrapegraphai.utils import prettify_exec_info
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
together_key = os.getenv("TOGETHER_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
||||||
|
"api_key": together_key,
|
||||||
|
"rate_limit": {
|
||||||
|
"requests_per_second": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description.",
|
||||||
|
# also accepts a string with the already downloaded HTML code
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(result)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Get graph execution info
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||||
|
print(prettify_exec_info(graph_exec_info))
|
||||||
@ -8,6 +8,7 @@ import uuid
|
|||||||
import warnings
|
import warnings
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from langchain.chat_models import init_chat_model
|
from langchain.chat_models import init_chat_model
|
||||||
|
from langchain_core.rate_limiters import InMemoryRateLimiter
|
||||||
from ..helpers import models_tokens
|
from ..helpers import models_tokens
|
||||||
from ..models import (
|
from ..models import (
|
||||||
OneApi,
|
OneApi,
|
||||||
@ -119,6 +120,17 @@ class AbstractGraph(ABC):
|
|||||||
|
|
||||||
llm_defaults = {"temperature": 0, "streaming": False}
|
llm_defaults = {"temperature": 0, "streaming": False}
|
||||||
llm_params = {**llm_defaults, **llm_config}
|
llm_params = {**llm_defaults, **llm_config}
|
||||||
|
rate_limit_params = llm_params.pop("rate_limit", {})
|
||||||
|
|
||||||
|
if rate_limit_params:
|
||||||
|
requests_per_second = rate_limit_params.get("requests_per_second")
|
||||||
|
max_retries = rate_limit_params.get("max_retries")
|
||||||
|
if requests_per_second is not None:
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter("ignore")
|
||||||
|
llm_params["rate_limiter"] = InMemoryRateLimiter(requests_per_second=requests_per_second)
|
||||||
|
if max_retries is not None:
|
||||||
|
llm_params["max_retries"] = max_retries
|
||||||
|
|
||||||
if "model_instance" in llm_params:
|
if "model_instance" in llm_params:
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -81,3 +81,17 @@ class TestAbstractGraph:
|
|||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
TestGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}})
|
TestGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("llm_config, expected_model", [
|
||||||
|
({"model": "openai/gpt-3.5-turbo", "openai_api_key": "sk-randomtest001", "rate_limit": {"requests_per_second": 1}}, ChatOpenAI),
|
||||||
|
({"model": "azure_openai/gpt-3.5-turbo", "api_key": "random-api-key", "api_version": "no version", "azure_endpoint": "https://www.example.com/", "rate_limit": {"requests_per_second": 1}}, AzureChatOpenAI),
|
||||||
|
({"model": "google_genai/gemini-pro", "google_api_key": "google-key-test", "rate_limit": {"requests_per_second": 1}}, ChatGoogleGenerativeAI),
|
||||||
|
({"model": "ollama/llama2", "rate_limit": {"requests_per_second": 1}}, ChatOllama),
|
||||||
|
({"model": "oneapi/qwen-turbo", "api_key": "oneapi-api-key", "rate_limit": {"requests_per_second": 1}}, OneApi),
|
||||||
|
({"model": "deepseek/deepseek-coder", "api_key": "deepseek-api-key", "rate_limit": {"requests_per_second": 1}}, DeepSeek),
|
||||||
|
({"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0", "region_name": "IDK", "rate_limit": {"requests_per_second": 1}}, ChatBedrock),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_llm_with_rate_limit(self, llm_config, expected_model):
|
||||||
|
graph = TestGraph("Test prompt", {"llm": llm_config})
|
||||||
|
assert isinstance(graph.llm_model, expected_model)
|
||||||
Loading…
Reference in New Issue
Block a user