mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-28 21:01:55 +08:00
feat: add api integration
This commit is contained in:
parent
92bb8bb168
commit
8aa9103f02
44
examples/scrapegraph-api/smart_scraper_api.py
Normal file
44
examples/scrapegraph-api/smart_scraper_api.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "scrapegraphai/smart-scraper",
|
||||
"api_key": os.getenv("SCRAPEGRAPH_API_KEY")
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="Extract me all the articles",
|
||||
source="https://www.wired.com",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
@ -43,7 +43,8 @@ dependencies = [
|
||||
"transformers>=4.44.2",
|
||||
"googlesearch-python>=1.2.5",
|
||||
"simpleeval>=1.0.0",
|
||||
"async_timeout>=4.0.3"
|
||||
"async_timeout>=4.0.3",
|
||||
"scrapegraph-py>=0.0.3"
|
||||
]
|
||||
|
||||
license = "MIT"
|
||||
|
||||
@ -353,7 +353,7 @@ pyasn1==0.6.0
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pydantic==2.8.2
|
||||
pydantic==2.10.1
|
||||
# via burr
|
||||
# via fastapi
|
||||
# via fastapi-pagination
|
||||
@ -368,7 +368,8 @@ pydantic==2.8.2
|
||||
# via openai
|
||||
# via pydantic-settings
|
||||
# via qdrant-client
|
||||
pydantic-core==2.20.1
|
||||
# via scrapegraph-py
|
||||
pydantic-core==2.27.1
|
||||
# via pydantic
|
||||
pydantic-settings==2.5.2
|
||||
# via langchain-community
|
||||
@ -396,6 +397,7 @@ python-dateutil==2.9.0.post0
|
||||
# via pandas
|
||||
python-dotenv==1.0.1
|
||||
# via pydantic-settings
|
||||
# via scrapegraph-py
|
||||
# via scrapegraphai
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
@ -424,6 +426,7 @@ requests==2.32.3
|
||||
# via langchain-community
|
||||
# via langsmith
|
||||
# via mistral-common
|
||||
# via scrapegraph-py
|
||||
# via sphinx
|
||||
# via streamlit
|
||||
# via tiktoken
|
||||
@ -439,6 +442,8 @@ s3transfer==0.10.2
|
||||
# via boto3
|
||||
safetensors==0.4.5
|
||||
# via transformers
|
||||
scrapegraph-py==0.0.3
|
||||
# via scrapegraphai
|
||||
semchunk==2.2.0
|
||||
# via scrapegraphai
|
||||
sentencepiece==0.2.0
|
||||
|
||||
@ -257,7 +257,7 @@ pyasn1==0.6.0
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pydantic==2.8.2
|
||||
pydantic==2.10.1
|
||||
# via google-generativeai
|
||||
# via langchain
|
||||
# via langchain-aws
|
||||
@ -269,7 +269,8 @@ pydantic==2.8.2
|
||||
# via openai
|
||||
# via pydantic-settings
|
||||
# via qdrant-client
|
||||
pydantic-core==2.20.1
|
||||
# via scrapegraph-py
|
||||
pydantic-core==2.27.1
|
||||
# via pydantic
|
||||
pydantic-settings==2.5.2
|
||||
# via langchain-community
|
||||
@ -286,6 +287,7 @@ python-dateutil==2.9.0.post0
|
||||
# via pandas
|
||||
python-dotenv==1.0.1
|
||||
# via pydantic-settings
|
||||
# via scrapegraph-py
|
||||
# via scrapegraphai
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
@ -313,6 +315,7 @@ requests==2.32.3
|
||||
# via langchain-community
|
||||
# via langsmith
|
||||
# via mistral-common
|
||||
# via scrapegraph-py
|
||||
# via tiktoken
|
||||
# via transformers
|
||||
rpds-py==0.20.0
|
||||
@ -324,6 +327,8 @@ s3transfer==0.10.2
|
||||
# via boto3
|
||||
safetensors==0.4.5
|
||||
# via transformers
|
||||
scrapegraph-py==0.0.3
|
||||
# via scrapegraphai
|
||||
semchunk==2.2.0
|
||||
# via scrapegraphai
|
||||
sentencepiece==0.2.0
|
||||
|
||||
@ -13,6 +13,7 @@ from ..nodes import (
|
||||
ConditionalNode
|
||||
)
|
||||
from ..prompts import REGEN_ADDITIONAL_INFO
|
||||
from scrapegraph_py import ScrapeGraphClient, smart_scraper
|
||||
|
||||
class SmartScraperGraph(AbstractGraph):
|
||||
"""
|
||||
@ -59,6 +60,11 @@ class SmartScraperGraph(AbstractGraph):
|
||||
Returns:
|
||||
BaseGraph: A graph instance representing the web scraping workflow.
|
||||
"""
|
||||
if self.llm_model == "scrapegraphai/smart-scraper":
|
||||
client = ScrapeGraphClient(self.config.get("api_key"))
|
||||
|
||||
result = smart_scraper(client, self.source, self.prompt)
|
||||
return result
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url| local_dir",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user