mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-04 21:00:36 +08:00
fix: revert
This commit is contained in:
parent
bb5de581c0
commit
b312251cc5
41
README.md
41
README.md
@ -24,21 +24,9 @@ Just say which information you want to extract and the library will do it for yo
|
||||
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/sgai-hero.png" alt="ScrapeGraphAI Hero" style="width: 100%;">
|
||||
</p>
|
||||
|
||||
## 🔗 ScrapeGraph API & SDKs
|
||||
If you are looking for a quick solution to integrate ScrapeGraph in your system, check out our powerful API [here!](https://dashboard.scrapegraphai.com/login)
|
||||
## News 📰
|
||||
|
||||
<p align="center">
|
||||
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/api-banner.png" alt="ScrapeGraph API Banner" style="width: 100%;">
|
||||
</p>
|
||||
|
||||
We offer SDKs in both Python and Node.js, making it easy to integrate into your projects. Check them out below:
|
||||
|
||||
| SDK | Language | GitHub Link |
|
||||
|-----------|----------|-----------------------------------------------------------------------------|
|
||||
| Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) |
|
||||
| Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) |
|
||||
|
||||
The Official API Documentation can be found [here](https://docs.scrapegraphai.com/).
|
||||
- ScrapegraphAI has now his APIs! Check it out [here](https://scrapegraphai.com)!
|
||||
|
||||
## 🚀 Quick install
|
||||
|
||||
@ -99,8 +87,8 @@ graph_config = {
|
||||
|
||||
# Create the SmartScraperGraph instance
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="Extract me all the news from the website",
|
||||
source="https://www.wired.com",
|
||||
prompt="Find some information about what does the company do, the name and a contact email.",
|
||||
source="https://scrapegraphai.com/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
@ -112,20 +100,10 @@ print(json.dumps(result, indent=4))
|
||||
The output will be a dictionary like the following:
|
||||
|
||||
```python
|
||||
"result": {
|
||||
"news": [
|
||||
{
|
||||
"title": "The New Jersey Drone Mystery May Not Actually Be That Mysterious",
|
||||
"link": "https://www.wired.com/story/new-jersey-drone-mystery-maybe-not-drones/",
|
||||
"author": "Lily Hay Newman"
|
||||
},
|
||||
{
|
||||
"title": "Former ByteDance Intern Accused of Sabotage Among Winners of Prestigious AI Award",
|
||||
"link": "https://www.wired.com/story/bytedance-intern-best-paper-neurips/",
|
||||
"author": "Louise Matsakis"
|
||||
},
|
||||
...
|
||||
]
|
||||
{
|
||||
"company": "ScrapeGraphAI",
|
||||
"name": "ScrapeGraphAI Extracting content from websites and local documents using LLM",
|
||||
"contact_email": "contact@scrapegraphai.com"
|
||||
}
|
||||
```
|
||||
There are other pipelines that can be used to extract information from multiple pages, generate Python scripts, or even generate audio files.
|
||||
@ -157,7 +135,8 @@ Try it directly on the web using Google Colab:
|
||||
## 📖 Documentation
|
||||
|
||||
The documentation for ScrapeGraphAI can be found [here](https://scrapegraph-ai.readthedocs.io/en/latest/).
|
||||
Check out also the Docusaurus [here](https://docs-oss.scrapegraphai.com/).
|
||||
|
||||
Check out also the Docusaurus [here](https://scrapegraph-doc.onrender.com/).
|
||||
|
||||
## 🏆 Sponsors
|
||||
<div style="text-align: center;">
|
||||
|
||||
1
examples/anthropic/.env.example
Normal file
1
examples/anthropic/.env.example
Normal file
@ -0,0 +1 @@
|
||||
ANTHROPIC_API_KEY="YOUR ANTHROPIC API KEY"
|
||||
59
examples/anthropic/code_generator_graph_anthropic.py
Normal file
59
examples/anthropic/code_generator_graph_anthropic.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using Code Generator with schema
|
||||
"""
|
||||
import os, json
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import CodeGeneratorGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key":anthropic_key,
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"reduction": 2,
|
||||
"max_iterations": {
|
||||
"overall": 10,
|
||||
"syntax": 3,
|
||||
"execution": 3,
|
||||
"validation": 3,
|
||||
"semantic": 3
|
||||
},
|
||||
"output_file_name": "extracted_data.py"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
code_generator_graph = CodeGeneratorGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = code_generator_graph.run()
|
||||
print(result)
|
||||
60
examples/anthropic/csv_scraper_anthropic.py
Normal file
60
examples/anthropic/csv_scraper_anthropic.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
# required environment variables in .env
|
||||
# HUGGINGFACEHUB_API_TOKEN
|
||||
# ANTHROPIC_API_KEY
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperGraph(
|
||||
prompt="List me all the last names",
|
||||
source=str(text), # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
54
examples/anthropic/csv_scraper_graph_multi_anthropic.py
Normal file
54
examples/anthropic/csv_scraper_graph_multi_anthropic.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
94
examples/anthropic/custom_graph_anthropic.py
Normal file
94
examples/anthropic/custom_graph_anthropic.py
Normal file
@ -0,0 +1,94 @@
|
||||
"""
|
||||
Example of custom graph using existing nodes
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = ChatAnthropic(graph_config["llm"])
|
||||
|
||||
# define the nodes for the graph
|
||||
robot_node = RobotsNode(
|
||||
input="url",
|
||||
output=["is_scrapable"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"force_scraping": True,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc"],
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Create the graph by defining the connections
|
||||
# ************************************************
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
robot_node,
|
||||
fetch_node,
|
||||
parse_node,
|
||||
generate_answer_node,
|
||||
],
|
||||
edges=[
|
||||
(robot_node, fetch_node),
|
||||
(fetch_node, parse_node),
|
||||
(parse_node, generate_answer_node)
|
||||
],
|
||||
entry_point=robot_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "Describe the content",
|
||||
"url": "https://example.com/"
|
||||
})
|
||||
|
||||
# get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(result)
|
||||
28
examples/anthropic/depth_search_graph_anthropic.py
Normal file
28
examples/anthropic/depth_search_graph_anthropic.py
Normal file
@ -0,0 +1,28 @@
|
||||
"""
|
||||
depth_search_graph_opeani example
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DepthSearchGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"depth": 2,
|
||||
"only_inside_links": False,
|
||||
}
|
||||
|
||||
search_graph = DepthSearchGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
42
examples/anthropic/document_scraper_anthropic.py
Normal file
42
examples/anthropic/document_scraper_anthropic.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
document_scraper example
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DocumentScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
source = """
|
||||
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
|
||||
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
|
||||
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
|
||||
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
|
||||
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
|
||||
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
|
||||
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
|
||||
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
|
||||
"""
|
||||
|
||||
pdf_scraper_graph = DocumentScraperGraph(
|
||||
prompt="Summarize the text and find the main topics",
|
||||
source=source,
|
||||
config=graph_config,
|
||||
)
|
||||
result = pdf_scraper_graph.run()
|
||||
|
||||
print(json.dumps(result, indent=4))
|
||||
120
examples/anthropic/inputs/books.xml
Normal file
120
examples/anthropic/inputs/books.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0"?>
|
||||
<catalog>
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</catalog>
|
||||
182
examples/anthropic/inputs/example.json
Normal file
182
examples/anthropic/inputs/example.json
Normal file
@ -0,0 +1,182 @@
|
||||
{
|
||||
"kind":"youtube#searchListResponse",
|
||||
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
|
||||
"nextPageToken":"CAUQAA",
|
||||
"regionCode":"NL",
|
||||
"pageInfo":{
|
||||
"totalResults":1000000,
|
||||
"resultsPerPage":5
|
||||
},
|
||||
"items":[
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"TvWDY4Mm5GM"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T14:15:01Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T14:15:01Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"aZM_42CcNZ4"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:09:27Z",
|
||||
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
|
||||
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
|
||||
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"John Nellis",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:09:27Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"wkP3XS3aNAY"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:00:50Z",
|
||||
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
|
||||
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
|
||||
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Shoot for Love",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:00:50Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"rJkDZ0WvfT8"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T10:00:39Z",
|
||||
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
|
||||
"title":"TOP 10 DEFENDERS 2023",
|
||||
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Home of Football",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T10:00:39Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"XH0rtu4U6SE"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-21T16:30:05Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-21T16:30:05Z"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
105
examples/anthropic/inputs/plain_html_example.txt
Normal file
105
examples/anthropic/inputs/plain_html_example.txt
Normal file
@ -0,0 +1,105 @@
|
||||
<body class="fixed-top-nav " style="padding-top: 57px;">
|
||||
<header>
|
||||
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco </span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
|
||||
<div class="collapse navbar-collapse text-right" id="navbarNav">
|
||||
<ul class="navbar-nav ml-auto flex-nowrap">
|
||||
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
|
||||
<li class="nav-item dropdown active">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
|
||||
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="/projects/">Projects</a>
|
||||
<div class="dropdown-divider"></div>
|
||||
<a class="dropdown-item" href="/competitions/">Competitions</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
|
||||
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<progress id="progress" value="0" max="284" style="top: 57px;">
|
||||
<div class="progress-container"> <span class="progress-bar"></span> </div>
|
||||
</progress>
|
||||
</header>
|
||||
<div class="container mt-5">
|
||||
<div class="post">
|
||||
<header class="post-header">
|
||||
<h1 class="post-title">Projects</h1>
|
||||
<p class="post-description"></p>
|
||||
</header>
|
||||
<article>
|
||||
<div class="projects">
|
||||
<div class="grid" style="position: relative; height: 861.992px;">
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
|
||||
<a href="/projects/rotary-pendulum-rl/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Rotary Pendulum RL</h4>
|
||||
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
|
||||
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">DQN Implementation from scratch</h4>
|
||||
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
|
||||
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Multi Agents HAED</h4>
|
||||
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
|
||||
<a href="/projects/wireless-esc-drone/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
|
||||
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
<footer class="fixed-bottom">
|
||||
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
|
||||
</footer>
|
||||
<div class="hiddendiv common"></div>
|
||||
</body>
|
||||
7
examples/anthropic/inputs/username.csv
Normal file
7
examples/anthropic/inputs/username.csv
Normal file
@ -0,0 +1,7 @@
|
||||
Username; Identifier;First name;Last name
|
||||
booker12;9012;Rachel;Booker
|
||||
grey07;2070;Laura;Grey
|
||||
johnson81;4081;Craig;Johnson
|
||||
jenkins46;9346;Mary;Jenkins
|
||||
smith79;5079;Jamie;Smith
|
||||
|
||||
|
43
examples/anthropic/json_scraper_anthropic.py
Normal file
43
examples/anthropic/json_scraper_anthropic.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the JSON file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the JSONScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
json_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = json_scraper_graph.run()
|
||||
print(result)
|
||||
35
examples/anthropic/json_scraper_multi_anthropic.py
Normal file
35
examples/anthropic/json_scraper_multi_anthropic.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
47
examples/anthropic/rate_limit_anthropic.py
Normal file
47
examples/anthropic/rate_limit_anthropic.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
|
||||
# required environment variables in .env
|
||||
# ANTHROPIC_API_KEY
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
"rate_limit": {
|
||||
"requests_per_second": 1
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
|
||||
event_end_date, event_end_time, location, event_mode, event_category,
|
||||
third_party_redirect, no_of_days,
|
||||
time_in_hours, hosted_or_attending, refreshments_type,
|
||||
registration_available, registration_link""",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.hmhco.com/event",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
52
examples/anthropic/scrape_plain_text_anthropic.py
Normal file
52
examples/anthropic/scrape_plain_text_anthropic.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/plain_html_example.txt"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# It could be also a http request using the request model
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
42
examples/anthropic/script_generator_anthropic.py
Normal file
42
examples/anthropic/script_generator_anthropic.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
51
examples/anthropic/script_multi_generator_anthropic.py
Normal file
51
examples/anthropic/script_multi_generator_anthropic.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorMultiGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
urls=[
|
||||
"https://schultzbergagency.com/emil-raste-karlsen/",
|
||||
"https://schultzbergagency.com/johanna-hedberg/",
|
||||
]
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorMultiGraph(
|
||||
prompt="Find information about actors",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source=urls,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
43
examples/anthropic/search_graph_anthropic.py
Normal file
43
examples/anthropic/search_graph_anthropic.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
44
examples/anthropic/search_graph_schema_anthropic.py
Normal file
44
examples/anthropic/search_graph_schema_anthropic.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Dish(BaseModel):
|
||||
name: str = Field(description="The name of the dish")
|
||||
description: str = Field(description="The description of the dish")
|
||||
|
||||
class Dishes(BaseModel):
|
||||
dishes: List[Dish]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config,
|
||||
schema=Dishes
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
45
examples/anthropic/search_link_graph_anthropic.py
Normal file
45
examples/anthropic/search_link_graph_anthropic.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
from langchain_openai import AzureChatOpenAI
|
||||
from langchain_openai import AzureOpenAIEmbeddings
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me the best escursions near Trento",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
41
examples/anthropic/smart_scraper_anthropic.py
Normal file
41
examples/anthropic/smart_scraper_anthropic.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
|
||||
event_end_date, event_end_time, location, event_mode, event_category,
|
||||
third_party_redirect, no_of_days,
|
||||
time_in_hours, hosted_or_attending, refreshments_type,
|
||||
registration_available, registration_link""",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.hmhco.com/event",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
32
examples/anthropic/smart_scraper_lite_anthropic.py
Normal file
32
examples/anthropic/smart_scraper_lite_anthropic.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
smart_scraper_lite_graph = SmartScraperLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source="https://perinim.github.io/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
56
examples/anthropic/smart_scraper_multi_anthropic.py
Normal file
56
examples/anthropic/smart_scraper_multi_anthropic.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
38
examples/anthropic/smart_scraper_multi_concat_anthropic.py
Normal file
38
examples/anthropic/smart_scraper_multi_concat_anthropic.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiConcatGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
35
examples/anthropic/smart_scraper_multi_lite_anthropic.py
Normal file
35
examples/anthropic/smart_scraper_multi_lite_anthropic.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_multi_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
51
examples/anthropic/smart_scraper_schema_anthropic.py
Normal file
51
examples/anthropic/smart_scraper_schema_anthropic.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
import os
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
schema=Projects,
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
55
examples/anthropic/xml_scraper_anthropic.py
Normal file
55
examples/anthropic/xml_scraper_anthropic.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperGraph from XML documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
55
examples/anthropic/xml_scraper_graph_multi_anthropic.py
Normal file
55
examples/anthropic/xml_scraper_graph_multi_anthropic.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "anthropic/claude-3-haiku-20240307",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
57
examples/azure/code_generator_graph_azure.py
Normal file
57
examples/azure/code_generator_graph_azure.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using Code Generator with schema
|
||||
"""
|
||||
import os
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import CodeGeneratorGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"reduction": 2,
|
||||
"max_iterations": {
|
||||
"overall": 10,
|
||||
"syntax": 3,
|
||||
"execution": 3,
|
||||
"validation": 3,
|
||||
"semantic": 3
|
||||
},
|
||||
"output_file_name": "extracted_data.py"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
code_generator_graph = CodeGeneratorGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = code_generator_graph.run()
|
||||
print(result)
|
||||
56
examples/azure/csv_scraper_azure.py
Normal file
56
examples/azure/csv_scraper_azure.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperGraph(
|
||||
prompt="List me all the last names",
|
||||
source=str(text), # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
55
examples/azure/csv_scraper_graph_multi_azure.py
Normal file
55
examples/azure/csv_scraper_graph_multi_azure.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
28
examples/azure/depth_search_graph_azure.py
Normal file
28
examples/azure/depth_search_graph_azure.py
Normal file
@ -0,0 +1,28 @@
|
||||
"""
|
||||
depth_search_graph_azure example
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DepthSearchGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"depth": 2,
|
||||
"only_inside_links": False,
|
||||
}
|
||||
|
||||
search_graph = DepthSearchGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
44
examples/azure/document_scraper_azure.py
Normal file
44
examples/azure/document_scraper_azure.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
document_scraper example
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DocumentScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
|
||||
source = """
|
||||
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
|
||||
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
|
||||
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
|
||||
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
|
||||
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
|
||||
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
|
||||
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
|
||||
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
|
||||
"""
|
||||
|
||||
pdf_scraper_graph = DocumentScraperGraph(
|
||||
prompt="Summarize the text and find the main topics",
|
||||
source=source,
|
||||
config=graph_config,
|
||||
)
|
||||
result = pdf_scraper_graph.run()
|
||||
|
||||
print(json.dumps(result, indent=4))
|
||||
120
examples/azure/inputs/books.xml
Normal file
120
examples/azure/inputs/books.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0"?>
|
||||
<catalog>
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</catalog>
|
||||
182
examples/azure/inputs/example.json
Normal file
182
examples/azure/inputs/example.json
Normal file
@ -0,0 +1,182 @@
|
||||
{
|
||||
"kind":"youtube#searchListResponse",
|
||||
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
|
||||
"nextPageToken":"CAUQAA",
|
||||
"regionCode":"NL",
|
||||
"pageInfo":{
|
||||
"totalResults":1000000,
|
||||
"resultsPerPage":5
|
||||
},
|
||||
"items":[
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"TvWDY4Mm5GM"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T14:15:01Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T14:15:01Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"aZM_42CcNZ4"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:09:27Z",
|
||||
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
|
||||
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
|
||||
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"John Nellis",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:09:27Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"wkP3XS3aNAY"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:00:50Z",
|
||||
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
|
||||
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
|
||||
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Shoot for Love",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:00:50Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"rJkDZ0WvfT8"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T10:00:39Z",
|
||||
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
|
||||
"title":"TOP 10 DEFENDERS 2023",
|
||||
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Home of Football",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T10:00:39Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"XH0rtu4U6SE"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-21T16:30:05Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-21T16:30:05Z"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
7
examples/azure/inputs/username.csv
Normal file
7
examples/azure/inputs/username.csv
Normal file
@ -0,0 +1,7 @@
|
||||
Username; Identifier;First name;Last name
|
||||
booker12;9012;Rachel;Booker
|
||||
grey07;2070;Laura;Grey
|
||||
johnson81;4081;Craig;Johnson
|
||||
jenkins46;9346;Mary;Jenkins
|
||||
smith79;5079;Jamie;Smith
|
||||
|
||||
|
45
examples/azure/json_scraper_azure.py
Normal file
45
examples/azure/json_scraper_azure.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
38
examples/azure/json_scraper_multi_azure.py
Normal file
38
examples/azure/json_scraper_multi_azure.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
56
examples/azure/rate_limit_azure.py
Normal file
56
examples/azure/rate_limit_azure.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
|
||||
# required environment variable in .env
|
||||
# AZURE_OPENAI_ENDPOINT
|
||||
# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
|
||||
# MODEL_NAME
|
||||
# AZURE_OPENAI_API_KEY
|
||||
# OPENAI_API_TYPE
|
||||
# AZURE_OPENAI_API_VERSION
|
||||
# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o",
|
||||
"rate_limit": {
|
||||
"requests_per_second": 1
|
||||
},
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
|
||||
event_end_date, event_end_time, location, event_mode, event_category,
|
||||
third_party_redirect, no_of_days,
|
||||
time_in_hours, hosted_or_attending, refreshments_type,
|
||||
registration_available, registration_link""",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.hmhco.com/event",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
54
examples/azure/scrape_plain_text_azure.py
Normal file
54
examples/azure/scrape_plain_text_azure.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/plain_html_example.txt"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# It could be also a http request using the request model
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
43
examples/azure/script_generator_azure.py
Normal file
43
examples/azure/script_generator_azure.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
52
examples/azure/script_multi_generator_azure.py
Normal file
52
examples/azure/script_multi_generator_azure.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorMultiGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
urls=[
|
||||
"https://schultzbergagency.com/emil-raste-karlsen/",
|
||||
"https://schultzbergagency.com/johanna-hedberg/",
|
||||
]
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorMultiGraph(
|
||||
prompt="Find information about actors",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source=urls,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
51
examples/azure/search_graph_azure.py
Normal file
51
examples/azure/search_graph_azure.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me the best escursions near Trento",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
60
examples/azure/search_graph_schema_azure.py
Normal file
60
examples/azure/search_graph_schema_azure.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Dish(BaseModel):
|
||||
name: str = Field(description="The name of the dish")
|
||||
description: str = Field(description="The description of the dish")
|
||||
|
||||
class Dishes(BaseModel):
|
||||
dishes: List[Dish]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config,
|
||||
schema=Dishes
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
45
examples/azure/search_link_graph_azure.py
Normal file
45
examples/azure/search_link_graph_azure.py
Normal file
@ -0,0 +1,45 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me the best escursions near Trento",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
44
examples/azure/smart_scraper_azure.py
Normal file
44
examples/azure/smart_scraper_azure.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="""List me all the events, with the following fields:
|
||||
company_name, event_name, event_start_date, event_start_time,
|
||||
event_end_date, event_end_time, location, event_mode, event_category,
|
||||
third_party_redirect, no_of_days,
|
||||
time_in_hours, hosted_or_attending, refreshments_type,
|
||||
registration_available, registration_link""",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.hmhco.com/event",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
31
examples/azure/smart_scraper_lite_azure.py
Normal file
31
examples/azure/smart_scraper_lite_azure.py
Normal file
@ -0,0 +1,31 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_lite_graph = SmartScraperLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source="https://perinim.github.io/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
38
examples/azure/smart_scraper_multi_azure.py
Normal file
38
examples/azure/smart_scraper_multi_azure.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
38
examples/azure/smart_scraper_multi_concat_azure.py
Normal file
38
examples/azure/smart_scraper_multi_concat_azure.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiConcatGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
35
examples/azure/smart_scraper_multi_lite_azure.py
Normal file
35
examples/azure/smart_scraper_multi_lite_azure.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_multi_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
49
examples/azure/smart_scraper_schema_azure.py
Normal file
49
examples/azure/smart_scraper_schema_azure.py
Normal file
@ -0,0 +1,49 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
46
examples/azure/xml_scraper_azure.py
Normal file
46
examples/azure/xml_scraper_azure.py
Normal file
@ -0,0 +1,46 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Initialize the model instances
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o"
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
smart_scraper_graph = XMLScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
56
examples/azure/xml_scraper_graph_multi_azure.py
Normal file
56
examples/azure/xml_scraper_graph_multi_azure.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
"model": "azure_openai/gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
4
examples/bedrock/.env.example
Normal file
4
examples/bedrock/.env.example
Normal file
@ -0,0 +1,4 @@
|
||||
AWS_ACCESS_KEY_ID="..."
|
||||
AWS_SECRET_ACCESS_KEY="..."
|
||||
AWS_SESSION_TOKEN="..."
|
||||
AWS_DEFAULT_REGION="..."
|
||||
3
examples/bedrock/README.md
Normal file
3
examples/bedrock/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
This folder contains examples of how to use ScrapeGraphAI with [Amazon Bedrock](https://aws.amazon.com/bedrock/) ⛰️. The examples show how to extract information from websites and files using a natural language prompt.
|
||||
|
||||

|
||||
60
examples/bedrock/code_generator_graph_bedrock.py
Normal file
60
examples/bedrock/code_generator_graph_bedrock.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using Code Generator with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import CodeGeneratorGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"reduction": 2,
|
||||
"max_iterations": {
|
||||
"overall": 10,
|
||||
"syntax": 3,
|
||||
"execution": 3,
|
||||
"validation": 3,
|
||||
"semantic": 3
|
||||
},
|
||||
"output_file_name": "extracted_data.py"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
code_generator_graph = CodeGeneratorGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = code_generator_graph.run()
|
||||
print(result)
|
||||
50
examples/bedrock/csv_scraper_bedrock.py
Normal file
50
examples/bedrock/csv_scraper_bedrock.py
Normal file
@ -0,0 +1,50 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from scrapegraphai.graphs import CSVScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
# ************************************************
|
||||
# Create the CSVScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperGraph(
|
||||
prompt="List me all the last names",
|
||||
source=str(text), # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
56
examples/bedrock/csv_scraper_graph_multi_bedrock.py
Normal file
56
examples/bedrock/csv_scraper_graph_multi_bedrock.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
125
examples/bedrock/custom_graph_bedrock.py
Normal file
125
examples/bedrock/custom_graph_bedrock.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""
|
||||
Example of custom graph using existing nodes
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from langchain_aws import BedrockEmbeddings
|
||||
from scrapegraphai.models import Bedrock
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import (
|
||||
FetchNode,
|
||||
ParseNode,
|
||||
RAGNode,
|
||||
GenerateAnswerNode,
|
||||
RobotsNode
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = Bedrock({
|
||||
'model_id': graph_config["llm"]["model"].split("/")[-1],
|
||||
'model_kwargs': {
|
||||
'temperature': 0.0
|
||||
}})
|
||||
embedder = BedrockEmbeddings(model_id=graph_config["embeddings"]["model"].split("/")[-1])
|
||||
|
||||
# Define the nodes for the graph
|
||||
robot_node = RobotsNode(
|
||||
input="url",
|
||||
output=["is_scrapable"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"force_scraping": True,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc"],
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"embedder_model": embedder,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Create the graph by defining the connections
|
||||
# ************************************************
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
robot_node,
|
||||
fetch_node,
|
||||
parse_node,
|
||||
rag_node,
|
||||
generate_answer_node,
|
||||
],
|
||||
edges=[
|
||||
(robot_node, fetch_node),
|
||||
(fetch_node, parse_node),
|
||||
(parse_node, rag_node),
|
||||
(rag_node, generate_answer_node)
|
||||
],
|
||||
entry_point=robot_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "List me all the articles",
|
||||
"url": "https://perinim.github.io/projects"
|
||||
})
|
||||
|
||||
# Get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(json.dumps(result, indent=4))
|
||||
25
examples/bedrock/depth_search_graph_bedrock.py
Normal file
25
examples/bedrock/depth_search_graph_bedrock.py
Normal file
@ -0,0 +1,25 @@
|
||||
"""
|
||||
depth_search_graph_opeani example
|
||||
"""
|
||||
from scrapegraphai.graphs import DepthSearchGraph
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
"depth": 2,
|
||||
"only_inside_links": False,
|
||||
}
|
||||
|
||||
search_graph = DepthSearchGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
42
examples/bedrock/document_scraper_bedrock.py
Normal file
42
examples/bedrock/document_scraper_bedrock.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
document_scraper example
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DocumentScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
source = """
|
||||
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
|
||||
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
|
||||
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
|
||||
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
|
||||
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
|
||||
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
|
||||
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
|
||||
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
|
||||
"""
|
||||
|
||||
pdf_scraper_graph = DocumentScraperGraph(
|
||||
prompt="Summarize the text and find the main topics",
|
||||
source=source,
|
||||
config=graph_config,
|
||||
)
|
||||
result = pdf_scraper_graph.run()
|
||||
|
||||
print(json.dumps(result, indent=4))
|
||||
120
examples/bedrock/inputs/books.xml
Normal file
120
examples/bedrock/inputs/books.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0"?>
|
||||
<catalog>
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</catalog>
|
||||
38
examples/bedrock/inputs/example.json
Normal file
38
examples/bedrock/inputs/example.json
Normal file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"quiz": {
|
||||
"sport": {
|
||||
"q1": {
|
||||
"question": "Which one is correct team name in NBA?",
|
||||
"options": [
|
||||
"New York Bulls",
|
||||
"Los Angeles Kings",
|
||||
"Golden State Warriros",
|
||||
"Huston Rocket"
|
||||
],
|
||||
"answer": "Huston Rocket"
|
||||
}
|
||||
},
|
||||
"maths": {
|
||||
"q1": {
|
||||
"question": "5 + 7 = ?",
|
||||
"options": [
|
||||
"10",
|
||||
"11",
|
||||
"12",
|
||||
"13"
|
||||
],
|
||||
"answer": "12"
|
||||
},
|
||||
"q2": {
|
||||
"question": "12 - 8 = ?",
|
||||
"options": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4"
|
||||
],
|
||||
"answer": "4"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
105
examples/bedrock/inputs/plain_html_example.txt
Normal file
105
examples/bedrock/inputs/plain_html_example.txt
Normal file
@ -0,0 +1,105 @@
|
||||
<body class="fixed-top-nav " style="padding-top: 57px;">
|
||||
<header>
|
||||
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco </span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
|
||||
<div class="collapse navbar-collapse text-right" id="navbarNav">
|
||||
<ul class="navbar-nav ml-auto flex-nowrap">
|
||||
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
|
||||
<li class="nav-item dropdown active">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
|
||||
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="/projects/">Projects</a>
|
||||
<div class="dropdown-divider"></div>
|
||||
<a class="dropdown-item" href="/competitions/">Competitions</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
|
||||
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<progress id="progress" value="0" max="284" style="top: 57px;">
|
||||
<div class="progress-container"> <span class="progress-bar"></span> </div>
|
||||
</progress>
|
||||
</header>
|
||||
<div class="container mt-5">
|
||||
<div class="post">
|
||||
<header class="post-header">
|
||||
<h1 class="post-title">Projects</h1>
|
||||
<p class="post-description"></p>
|
||||
</header>
|
||||
<article>
|
||||
<div class="projects">
|
||||
<div class="grid" style="position: relative; height: 861.992px;">
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
|
||||
<a href="/projects/rotary-pendulum-rl/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Rotary Pendulum RL</h4>
|
||||
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
|
||||
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">DQN Implementation from scratch</h4>
|
||||
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
|
||||
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Multi Agents HAED</h4>
|
||||
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
|
||||
<a href="/projects/wireless-esc-drone/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
|
||||
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
<footer class="fixed-bottom">
|
||||
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
|
||||
</footer>
|
||||
<div class="hiddendiv common"></div>
|
||||
</body>
|
||||
6
examples/bedrock/inputs/username.csv
Normal file
6
examples/bedrock/inputs/username.csv
Normal file
@ -0,0 +1,6 @@
|
||||
Username; Identifier;First name;Last name
|
||||
booker12;9012;Rachel;Booker
|
||||
grey07;2070;Laura;Grey
|
||||
johnson81;4081;Craig;Johnson
|
||||
jenkins46;9346;Mary;Jenkins
|
||||
smith79;5079;Jamie;Smith
|
||||
|
57
examples/bedrock/json_scraper_bedrock.py
Normal file
57
examples/bedrock/json_scraper_bedrock.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the JSON file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the JSONScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
json_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all questions and options in the math section, no answers.",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = json_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = json_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
32
examples/bedrock/json_scraper_multi_bedrock.py
Normal file
32
examples/bedrock/json_scraper_multi_bedrock.py
Normal file
@ -0,0 +1,32 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
44
examples/bedrock/rate_limit_bedrock.py
Normal file
44
examples/bedrock/rate_limit_bedrock.py
Normal file
@ -0,0 +1,44 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with a custom rate limit
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0,
|
||||
"rate_limit": {
|
||||
"requests_per_second": 1
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
54
examples/bedrock/scrape_plain_text_bedrock.py
Normal file
54
examples/bedrock/scrape_plain_text_bedrock.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/plain_html_example.txt"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# It could be also a http request using the request model
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
BIN
examples/bedrock/scrapegraphai_bedrock.png
Normal file
BIN
examples/bedrock/scrapegraphai_bedrock.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
43
examples/bedrock/script_generator_bedrock.py
Normal file
43
examples/bedrock/script_generator_bedrock.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
48
examples/bedrock/script_multi_generator_bedrock.py
Normal file
48
examples/bedrock/script_multi_generator_bedrock.py
Normal file
@ -0,0 +1,48 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
from scrapegraphai.graphs import ScriptCreatorMultiGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
urls=[
|
||||
"https://schultzbergagency.com/emil-raste-karlsen/",
|
||||
"https://schultzbergagency.com/johanna-hedberg/",
|
||||
]
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorMultiGraph(
|
||||
prompt="Find information about actors",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source=urls,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
27
examples/bedrock/search_graph_bedrock.py
Normal file
27
examples/bedrock/search_graph_bedrock.py
Normal file
@ -0,0 +1,27 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
54
examples/bedrock/search_graph_schema_bedrock.py
Normal file
54
examples/bedrock/search_graph_schema_bedrock.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Dish(BaseModel):
|
||||
name: str = Field(description="The name of the dish")
|
||||
description: str = Field(description="The description of the dish")
|
||||
|
||||
class Dishes(BaseModel):
|
||||
dishes: List[Dish]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config,
|
||||
schema=Dishes
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
40
examples/bedrock/search_link_graph_bedrock.py
Normal file
40
examples/bedrock/search_link_graph_bedrock.py
Normal file
@ -0,0 +1,40 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me the best escursions near Trento",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
42
examples/bedrock/smart_scraper_bedrock.py
Normal file
42
examples/bedrock/smart_scraper_bedrock.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
26
examples/bedrock/smart_scraper_lite_bedrock.py
Normal file
26
examples/bedrock/smart_scraper_lite_bedrock.py
Normal file
@ -0,0 +1,26 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import SmartScraperLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
smart_scraper_lite_graph = SmartScraperLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source="https://perinim.github.io/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
34
examples/bedrock/smart_scraper_multi_bedrock.py
Normal file
34
examples/bedrock/smart_scraper_multi_bedrock.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
35
examples/bedrock/smart_scraper_multi_concat_bedrock.py
Normal file
35
examples/bedrock/smart_scraper_multi_concat_bedrock.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiConcatGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
29
examples/bedrock/smart_scraper_multi_lite_bedrock.py
Normal file
29
examples/bedrock/smart_scraper_multi_lite_bedrock.py
Normal file
@ -0,0 +1,29 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_multi_lite_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
52
examples/bedrock/smart_scraper_schema_bedrock.py
Normal file
52
examples/bedrock/smart_scraper_schema_bedrock.py
Normal file
@ -0,0 +1,52 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
from typing import List
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
58
examples/bedrock/xml_scraper_bedrock.py
Normal file
58
examples/bedrock/xml_scraper_bedrock.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books. Skip the preamble.",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
56
examples/bedrock/xml_scraper_graph_multi_bedrock.py
Normal file
56
examples/bedrock/xml_scraper_graph_multi_bedrock.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
1
examples/benchmarks/GenerateScraper/.env.example
Normal file
1
examples/benchmarks/GenerateScraper/.env.example
Normal file
@ -0,0 +1 @@
|
||||
OPENAI_APIKEY="your openai key here"
|
||||
43
examples/benchmarks/GenerateScraper/Readme.md
Normal file
43
examples/benchmarks/GenerateScraper/Readme.md
Normal file
@ -0,0 +1,43 @@
|
||||
# Local models
|
||||
# Local models
|
||||
The two websites benchmark are:
|
||||
- Example 1: https://perinim.github.io/projects
|
||||
- Example 2: https://www.wired.com (at 17/4/2024)
|
||||
|
||||
Both are strored locally as txt file in .txt format because in this way we do not have to think about the internet connection
|
||||
|
||||
The time is measured in seconds
|
||||
|
||||
The model runned for this benchmark is Mistral on Ollama with nomic-embed-text
|
||||
|
||||
| Hardware | Model | Example 1 | Example 2 |
|
||||
| ---------------------- | --------------------------------------- | --------- | --------- |
|
||||
| Macbook 14' m1 pro | Mistral on Ollama with nomic-embed-text | 30.54s | 35.76s |
|
||||
| Macbook m2 max | Mistral on Ollama with nomic-embed-text | | |
|
||||
| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text | 27.82s | 29.986s |
|
||||
| Macbook m2 max<br> | Llama3 on Ollama with nomic-embed-text | | |
|
||||
|
||||
|
||||
**Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama).
|
||||
# Performance on APIs services
|
||||
### Example 1: personal portfolio
|
||||
**URL**: https://perinim.github.io/projects
|
||||
**Task**: List me all the projects with their description.
|
||||
|
||||
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
|
||||
| --------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
|
||||
| gpt-3.5-turbo | 24.21 | 1892 | 1802 | 90 | 1 | 0.002883 |
|
||||
| gpt-4-turbo-preview | 6.614 | 1936 | 1802 | 134 | 1 | 0.02204 |
|
||||
| Grooq with nomic-embed-text | 6.71 | 2201 | 2024 | 177 | 1 | 0 |
|
||||
|
||||
### Example 2: Wired
|
||||
**URL**: https://www.wired.com
|
||||
**Task**: List me all the articles with their description.
|
||||
|
||||
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
|
||||
| --------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
|
||||
| gpt-3.5-turbo | | | | | | |
|
||||
| gpt-4-turbo-preview | | | | | | |
|
||||
| Grooq with nomic-embed-text | | | | | | |
|
||||
|
||||
|
||||
61
examples/benchmarks/GenerateScraper/benchmark_groq.py
Normal file
61
examples/benchmarks/GenerateScraper/benchmark_groq.py
Normal file
@ -0,0 +1,61 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"headless": False,
|
||||
"library": "beautifoulsoup"
|
||||
}
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = ScriptCreatorGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
57
examples/benchmarks/GenerateScraper/benchmark_llama3.py
Normal file
57
examples/benchmarks/GenerateScraper/benchmark_llama3.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
# "model_tokens": 2000, # set context length arbitrarily,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"library": "beautifoulsoup"
|
||||
}
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = ScriptCreatorGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
62
examples/benchmarks/GenerateScraper/benchmark_mistral.py
Normal file
62
examples/benchmarks/GenerateScraper/benchmark_mistral.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("GPT4_KEY")
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/mistral",
|
||||
"temperature": 0,
|
||||
# "model_tokens": 2000, # set context length arbitrarily,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"library": "beautifoulsoup"
|
||||
}
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = ScriptCreatorGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
@ -0,0 +1,53 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "openai/gpt-3.5-turbo",
|
||||
},
|
||||
"library": "beautifoulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = ScriptCreatorGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
53
examples/benchmarks/GenerateScraper/benchmark_openai_gpt4.py
Normal file
53
examples/benchmarks/GenerateScraper/benchmark_openai_gpt4.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "openai/gpt-4-turbo-2024-04-09",
|
||||
},
|
||||
"library": "beautifoulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = ScriptCreatorGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
105
examples/benchmarks/GenerateScraper/inputs/example_1.txt
Normal file
105
examples/benchmarks/GenerateScraper/inputs/example_1.txt
Normal file
@ -0,0 +1,105 @@
|
||||
<body class="fixed-top-nav " style="padding-top: 57px;">
|
||||
<header>
|
||||
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco </span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
|
||||
<div class="collapse navbar-collapse text-right" id="navbarNav">
|
||||
<ul class="navbar-nav ml-auto flex-nowrap">
|
||||
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
|
||||
<li class="nav-item dropdown active">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
|
||||
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="/projects/">Projects</a>
|
||||
<div class="dropdown-divider"></div>
|
||||
<a class="dropdown-item" href="/competitions/">Competitions</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
|
||||
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<progress id="progress" value="0" max="284" style="top: 57px;">
|
||||
<div class="progress-container"> <span class="progress-bar"></span> </div>
|
||||
</progress>
|
||||
</header>
|
||||
<div class="container mt-5">
|
||||
<div class="post">
|
||||
<header class="post-header">
|
||||
<h1 class="post-title">Projects</h1>
|
||||
<p class="post-description"></p>
|
||||
</header>
|
||||
<article>
|
||||
<div class="projects">
|
||||
<div class="grid" style="position: relative; height: 861.992px;">
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
|
||||
<a href="/projects/rotary-pendulum-rl/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Rotary Pendulum RL</h4>
|
||||
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
|
||||
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">DQN Implementation from scratch</h4>
|
||||
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
|
||||
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Multi Agents HAED</h4>
|
||||
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
|
||||
<a href="/projects/wireless-esc-drone/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
|
||||
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
<footer class="fixed-bottom">
|
||||
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
|
||||
</footer>
|
||||
<div class="hiddendiv common"></div>
|
||||
</body>
|
||||
400
examples/benchmarks/GenerateScraper/inputs/example_2.txt
Normal file
400
examples/benchmarks/GenerateScraper/inputs/example_2.txt
Normal file
File diff suppressed because one or more lines are too long
1
examples/benchmarks/SmartScraper/.env.example
Normal file
1
examples/benchmarks/SmartScraper/.env.example
Normal file
@ -0,0 +1 @@
|
||||
OPENAI_APIKEY="your openai key here"
|
||||
42
examples/benchmarks/SmartScraper/Readme.md
Normal file
42
examples/benchmarks/SmartScraper/Readme.md
Normal file
@ -0,0 +1,42 @@
|
||||
# Local models
|
||||
# Local models
|
||||
The two websites benchmark are:
|
||||
- Example 1: https://perinim.github.io/projects
|
||||
- Example 2: https://www.wired.com (at 17/4/2024)
|
||||
|
||||
Both are strored locally as txt file in .txt format because in this way we do not have to think about the internet connection
|
||||
|
||||
| Hardware | Model | Example 1 | Example 2 |
|
||||
| ---------------------- | --------------------------------------- | --------- | --------- |
|
||||
| Macbook 14' m1 pro | Mistral on Ollama with nomic-embed-text | 16.291s | 38.74s |
|
||||
| Macbook m2 max | Mistral on Ollama with nomic-embed-text | | |
|
||||
| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text | 12.88s | 13.84s |
|
||||
| Macbook m2 max<br> | Llama3 on Ollama with nomic-embed-text | | |
|
||||
|
||||
**Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama). Indeed the results are the following:
|
||||
|
||||
| Hardware | Example 1 | Example 2 |
|
||||
| ------------------ | --------- | --------- |
|
||||
| Macbook 14' m1 pro | 139.89 | Too long |
|
||||
# Performance on APIs services
|
||||
### Example 1: personal portfolio
|
||||
**URL**: https://perinim.github.io/projects
|
||||
**Task**: List me all the projects with their description.
|
||||
|
||||
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
|
||||
| ------------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
|
||||
| gpt-3.5-turbo | 4.132s | 438 | 303 | 135 | 1 | 0.000724 |
|
||||
| gpt-4-turbo-preview | 6.965s | 442 | 303 | 139 | 1 | 0.0072 |
|
||||
| gpt-4-o | 4.446s | 444 | 305 | 139 | 1 | 0 |
|
||||
| Grooq with nomic-embed-text<br> | 1.335s | 648 | 482 | 166 | 1 | 0 |
|
||||
|
||||
### Example 2: Wired
|
||||
**URL**: https://www.wired.com
|
||||
**Task**: List me all the articles with their description.
|
||||
|
||||
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
|
||||
| ------------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
|
||||
| gpt-3.5-turbo | 8.836s | 1167 | 726 | 441 | 1 | 0.001971 |
|
||||
| gpt-4-turbo-preview | 21.53s | 1205 | 726 | 479 | 1 | 0.02163 |
|
||||
| gpt-4-o | 15.27s | 1400 | 715 | 685 | 1 | 0 |
|
||||
| Grooq with nomic-embed-text<br> | 3.82s | 2459 | 2192 | 267 | 1 | 0 |
|
||||
51
examples/benchmarks/SmartScraper/benchmark_docker.py
Normal file
51
examples/benchmarks/SmartScraper/benchmark_docker.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/mistral",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "model_tokens": 2000, # set context length arbitrarily
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
57
examples/benchmarks/SmartScraper/benchmark_groq.py
Normal file
57
examples/benchmarks/SmartScraper/benchmark_groq.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
|
||||
},
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
53
examples/benchmarks/SmartScraper/benchmark_llama3.py
Normal file
53
examples/benchmarks/SmartScraper/benchmark_llama3.py
Normal file
@ -0,0 +1,53 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "model_tokens": 2000, # set context length arbitrarily
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434",
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
54
examples/benchmarks/SmartScraper/benchmark_mistral.py
Normal file
54
examples/benchmarks/SmartScraper/benchmark_mistral.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
|
||||
tasks = ["List me all the projects with their description.",
|
||||
"List me all the articles with their description."]
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/mistral",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "model_tokens": 2000, # set context length arbitrarily
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434",
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
for i in range(0, 2):
|
||||
with open(files[i], 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt=tasks[i],
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user