fix: revert

This commit is contained in:
Marco Vinciguerra 2025-01-02 14:49:46 +01:00
parent bb5de581c0
commit b312251cc5
523 changed files with 27946 additions and 75 deletions

View File

@ -24,21 +24,9 @@ Just say which information you want to extract and the library will do it for yo
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/sgai-hero.png" alt="ScrapeGraphAI Hero" style="width: 100%;">
</p>
## 🔗 ScrapeGraph API & SDKs
If you are looking for a quick solution to integrate ScrapeGraph in your system, check out our powerful API [here!](https://dashboard.scrapegraphai.com/login)
## News 📰
<p align="center">
<img src="https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/api-banner.png" alt="ScrapeGraph API Banner" style="width: 100%;">
</p>
We offer SDKs in both Python and Node.js, making it easy to integrate into your projects. Check them out below:
| SDK | Language | GitHub Link |
|-----------|----------|-----------------------------------------------------------------------------|
| Python SDK | Python | [scrapegraph-py](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-py) |
| Node.js SDK | Node.js | [scrapegraph-js](https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js) |
The Official API Documentation can be found [here](https://docs.scrapegraphai.com/).
- ScrapegraphAI has now his APIs! Check it out [here](https://scrapegraphai.com)!
## 🚀 Quick install
@ -99,8 +87,8 @@ graph_config = {
# Create the SmartScraperGraph instance
smart_scraper_graph = SmartScraperGraph(
prompt="Extract me all the news from the website",
source="https://www.wired.com",
prompt="Find some information about what does the company do, the name and a contact email.",
source="https://scrapegraphai.com/",
config=graph_config
)
@ -112,20 +100,10 @@ print(json.dumps(result, indent=4))
The output will be a dictionary like the following:
```python
"result": {
"news": [
{
"title": "The New Jersey Drone Mystery May Not Actually Be That Mysterious",
"link": "https://www.wired.com/story/new-jersey-drone-mystery-maybe-not-drones/",
"author": "Lily Hay Newman"
},
{
"title": "Former ByteDance Intern Accused of Sabotage Among Winners of Prestigious AI Award",
"link": "https://www.wired.com/story/bytedance-intern-best-paper-neurips/",
"author": "Louise Matsakis"
},
...
]
{
"company": "ScrapeGraphAI",
"name": "ScrapeGraphAI Extracting content from websites and local documents using LLM",
"contact_email": "contact@scrapegraphai.com"
}
```
There are other pipelines that can be used to extract information from multiple pages, generate Python scripts, or even generate audio files.
@ -157,7 +135,8 @@ Try it directly on the web using Google Colab:
## 📖 Documentation
The documentation for ScrapeGraphAI can be found [here](https://scrapegraph-ai.readthedocs.io/en/latest/).
Check out also the Docusaurus [here](https://docs-oss.scrapegraphai.com/).
Check out also the Docusaurus [here](https://scrapegraph-doc.onrender.com/).
## 🏆 Sponsors
<div style="text-align: center;">

View File

@ -0,0 +1 @@
ANTHROPIC_API_KEY="YOUR ANTHROPIC API KEY"

View File

@ -0,0 +1,59 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""
import os, json
from typing import List
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Define the configuration for the graph
# ************************************************
anthropic_key = os.getenv("ANTHROPIC_API_KEY")
graph_config = {
"llm": {
"api_key":anthropic_key,
"model": "anthropic/claude-3-haiku-20240307",
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = code_generator_graph.run()
print(result)

View File

@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
# required environment variables in .env
# HUGGINGFACEHUB_API_TOKEN
# ANTHROPIC_API_KEY
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the CSVScraperGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,54 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the CSVScraperMultiGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,94 @@
"""
Example of custom graph using existing nodes
"""
import os
from dotenv import load_dotenv
from langchain_anthropic import ChatAnthropic
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "claude-3-haiku-20240307",
},
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = ChatAnthropic(graph_config["llm"])
# define the nodes for the graph
robot_node = RobotsNode(
input="url",
output=["is_scrapable"],
node_config={
"llm_model": llm_model,
"force_scraping": True,
"verbose": True,
}
)
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
# ************************************************
# Create the graph by defining the connections
# ************************************************
graph = BaseGraph(
nodes=[
robot_node,
fetch_node,
parse_node,
generate_answer_node,
],
edges=[
(robot_node, fetch_node),
(fetch_node, parse_node),
(parse_node, generate_answer_node)
],
entry_point=robot_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "Describe the content",
"url": "https://example.com/"
})
# get the answer from the result
result = result.get("answer", "No answer found.")
print(result)

View File

@ -0,0 +1,28 @@
"""
depth_search_graph_opeani example
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import DepthSearchGraph
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
"verbose": True,
"headless": False,
"depth": 2,
"only_inside_links": False,
}
search_graph = DepthSearchGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io",
config=graph_config
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,42 @@
"""
document_scraper example
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import DocumentScraperGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
}
}
source = """
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
Divided into three major sectionsInferno, Purgatorio, and Paradisothe narrative traces the journey of Dante
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
"""
pdf_scraper_graph = DocumentScraperGraph(
prompt="Summarize the text and find the main topics",
source=source,
config=graph_config,
)
result = pdf_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,120 @@
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>

View File

@ -0,0 +1,182 @@
{
"kind":"youtube#searchListResponse",
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
"nextPageToken":"CAUQAA",
"regionCode":"NL",
"pageInfo":{
"totalResults":1000000,
"resultsPerPage":5
},
"items":[
{
"kind":"youtube#searchResult",
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
"id":{
"kind":"youtube#video",
"videoId":"TvWDY4Mm5GM"
},
"snippet":{
"publishedAt":"2023-07-24T14:15:01Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T14:15:01Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
"id":{
"kind":"youtube#video",
"videoId":"aZM_42CcNZ4"
},
"snippet":{
"publishedAt":"2023-07-24T16:09:27Z",
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"John Nellis",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:09:27Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
"id":{
"kind":"youtube#video",
"videoId":"wkP3XS3aNAY"
},
"snippet":{
"publishedAt":"2023-07-24T16:00:50Z",
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Shoot for Love",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:00:50Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
"id":{
"kind":"youtube#video",
"videoId":"rJkDZ0WvfT8"
},
"snippet":{
"publishedAt":"2023-07-24T10:00:39Z",
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
"title":"TOP 10 DEFENDERS 2023",
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Home of Football",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T10:00:39Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
"id":{
"kind":"youtube#video",
"videoId":"XH0rtu4U6SE"
},
"snippet":{
"publishedAt":"2023-07-21T16:30:05Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-21T16:30:05Z"
}
}
]
}

View File

@ -0,0 +1,105 @@
<body class="fixed-top-nav " style="padding-top: 57px;">
<header>
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
<div class="container">
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco&nbsp;</span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
<div class="collapse navbar-collapse text-right" id="navbarNav">
<ul class="navbar-nav ml-auto flex-nowrap">
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
<li class="nav-item dropdown active">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="/projects/">Projects</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="/competitions/">Competitions</a>
</div>
</li>
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
</ul>
</div>
</div>
</nav>
<progress id="progress" value="0" max="284" style="top: 57px;">
<div class="progress-container"> <span class="progress-bar"></span> </div>
</progress>
</header>
<div class="container mt-5">
<div class="post">
<header class="post-header">
<h1 class="post-title">Projects</h1>
<p class="post-description"></p>
</header>
<article>
<div class="projects">
<div class="grid" style="position: relative; height: 861.992px;">
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
<a href="/projects/rotary-pendulum-rl/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Rotary Pendulum RL</h4>
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">DQN Implementation from scratch</h4>
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Multi Agents HAED</h4>
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
<a href="/projects/wireless-esc-drone/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
</div>
</div>
</article>
</div>
</div>
<footer class="fixed-bottom">
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
</footer>
<div class="hiddendiv common"></div>
</body>

View File

@ -0,0 +1,7 @@
Username; Identifier;First name;Last name
booker12;9012;Rachel;Booker
grey07;2070;Laura;Grey
johnson81;4081;Craig;Johnson
jenkins46;9346;Mary;Jenkins
smith79;5079;Jamie;Smith
1 Username Identifier First name Last name
2 booker12 9012 Rachel Booker
3 grey07 2070 Laura Grey
4 johnson81 4081 Craig Johnson
5 jenkins46 9346 Mary Jenkins
6 smith79 5079 Jamie Smith

View File

@ -0,0 +1,43 @@
"""
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperGraph
load_dotenv()
# ************************************************
# Read the JSON file
# ************************************************
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
}
}
# ************************************************
# Create the JSONScraperGraph instance and run it
# ************************************************
json_scraper_graph = JSONScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = json_scraper_graph.run()
print(result)

View File

@ -0,0 +1,35 @@
"""
Module for showing how JSONScraperMultiGraph multi works
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperMultiGraph
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
sources = [text, text]
multiple_search_graph = JSONScraperMultiGraph(
prompt= "List me all the authors, title and genres of the books",
source= sources,
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,47 @@
"""
Basic example of scraping pipeline using SmartScraper while setting an API rate limit.
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# required environment variables in .env
# ANTHROPIC_API_KEY
load_dotenv()
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
"rate_limit": {
"requests_per_second": 1
}
},
}
smart_scraper_graph = SmartScraperGraph(
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,52 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
FILE_NAME = "inputs/plain_html_example.txt"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
# It could be also a http request using the request model
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,42 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects",
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,51 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorMultiGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
"library": "beautifulsoup"
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
urls=[
"https://schultzbergagency.com/emil-raste-karlsen/",
"https://schultzbergagency.com/johanna-hedberg/",
]
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorMultiGraph(
prompt="Find information about actors",
# also accepts a string with the already downloaded HTML code
source=urls,
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,43 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,44 @@
"""
Example of Search Graph
"""
import os
from typing import List
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from scrapegraphai.graphs import SearchGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Dish(BaseModel):
name: str = Field(description="The name of the dish")
description: str = Field(description="The description of the dish")
class Dishes(BaseModel):
dishes: List[Dish]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config,
schema=Dishes
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,45 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
# ************************************************
# Define the configuration for the graph
# ************************************************
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,41 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
smart_scraper_graph = SmartScraperGraph(
prompt="""Don't say anything else. Output JSON only. List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,32 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperLiteGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
"verbose": True,
"headless": False,
}
smart_scraper_lite_graph = SmartScraperLiteGraph(
prompt="Who is Marco Perini?",
source="https://perinim.github.io/",
config=graph_config
)
result = smart_scraper_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,38 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
load_dotenv()
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiConcatGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,35 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
"verbose": True,
"headless": False,
}
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
config=graph_config
)
result = smart_scraper_multi_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,51 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from typing import List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
schema=Projects,
source="https://perinim.github.io/projects/",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,55 @@
"""
Basic example of scraping pipeline using XMLScraperGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the XMLScraperGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,55 @@
"""
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.getenv("ANTHROPIC_API_KEY"),
"model": "anthropic/claude-3-haiku-20240307",
},
}
# ************************************************
# Create the XMLScraperMultiGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperMultiGraph(
prompt="List me all the authors, title and genres of the books",
source=[text, text], # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""
import os
from typing import List
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = code_generator_graph.run()
print(result)

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the CSVScraperGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,55 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the CSVScraperMultiGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,28 @@
"""
depth_search_graph_azure example
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import DepthSearchGraph
load_dotenv()
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o",
},
"verbose": True,
"headless": False,
"depth": 2,
"only_inside_links": False,
}
search_graph = DepthSearchGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io",
config=graph_config
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,44 @@
"""
document_scraper example
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import DocumentScraperGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
source = """
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
Divided into three major sectionsInferno, Purgatorio, and Paradisothe narrative traces the journey of Dante
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
"""
pdf_scraper_graph = DocumentScraperGraph(
prompt="Summarize the text and find the main topics",
source=source,
config=graph_config,
)
result = pdf_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,120 @@
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>

View File

@ -0,0 +1,182 @@
{
"kind":"youtube#searchListResponse",
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
"nextPageToken":"CAUQAA",
"regionCode":"NL",
"pageInfo":{
"totalResults":1000000,
"resultsPerPage":5
},
"items":[
{
"kind":"youtube#searchResult",
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
"id":{
"kind":"youtube#video",
"videoId":"TvWDY4Mm5GM"
},
"snippet":{
"publishedAt":"2023-07-24T14:15:01Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T14:15:01Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
"id":{
"kind":"youtube#video",
"videoId":"aZM_42CcNZ4"
},
"snippet":{
"publishedAt":"2023-07-24T16:09:27Z",
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"John Nellis",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:09:27Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
"id":{
"kind":"youtube#video",
"videoId":"wkP3XS3aNAY"
},
"snippet":{
"publishedAt":"2023-07-24T16:00:50Z",
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Shoot for Love",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:00:50Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
"id":{
"kind":"youtube#video",
"videoId":"rJkDZ0WvfT8"
},
"snippet":{
"publishedAt":"2023-07-24T10:00:39Z",
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
"title":"TOP 10 DEFENDERS 2023",
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Home of Football",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T10:00:39Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
"id":{
"kind":"youtube#video",
"videoId":"XH0rtu4U6SE"
},
"snippet":{
"publishedAt":"2023-07-21T16:30:05Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-21T16:30:05Z"
}
}
]
}

View File

@ -0,0 +1,7 @@
Username; Identifier;First name;Last name
booker12;9012;Rachel;Booker
grey07;2070;Laura;Grey
johnson81;4081;Craig;Johnson
jenkins46;9346;Mary;Jenkins
smith79;5079;Jamie;Smith
1 Username Identifier First name Last name
2 booker12 9012 Rachel Booker
3 grey07 2070 Laura Grey
4 johnson81 4081 Craig Johnson
5 jenkins46 9346 Mary Jenkins
6 smith79 5079 Jamie Smith

View File

@ -0,0 +1,45 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
smart_scraper_graph = JSONScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,38 @@
"""
Module for showing how JSONScraperMultiGraph multi works
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperMultiGraph
load_dotenv()
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
sources = [text, text]
multiple_search_graph = JSONScraperMultiGraph(
prompt= "List me all the authors, title and genres of the books",
source= sources,
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# required environment variable in .env
# AZURE_OPENAI_ENDPOINT
# AZURE_OPENAI_CHAT_DEPLOYMENT_NAME
# MODEL_NAME
# AZURE_OPENAI_API_KEY
# OPENAI_API_TYPE
# AZURE_OPENAI_API_VERSION
# AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME
load_dotenv()
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o",
"rate_limit": {
"requests_per_second": 1
},
},
"verbose": True,
"headless": False
}
smart_scraper_graph = SmartScraperGraph(
prompt="""List me all the events, with the following fields: company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,54 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
FILE_NAME = "inputs/plain_html_example.txt"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
# It could be also a http request using the request model
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,43 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects",
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,52 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorMultiGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
urls=[
"https://schultzbergagency.com/emil-raste-karlsen/",
"https://schultzbergagency.com/johanna-hedberg/",
]
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorMultiGraph(
prompt="Find information about actors",
# also accepts a string with the already downloaded HTML code
source=urls,
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,51 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,60 @@
"""
Example of Search Graph
"""
import os
from typing import List
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from pydantic import BaseModel, Field
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Dish(BaseModel):
name: str = Field(description="The name of the dish")
description: str = Field(description="The description of the dish")
class Dishes(BaseModel):
dishes: List[Dish]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config,
schema=Dishes
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,45 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
# ************************************************
# Define the configuration for the graph
# ************************************************
load_dotenv()
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,44 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
smart_scraper_graph = SmartScraperGraph(
prompt="""List me all the events, with the following fields:
company_name, event_name, event_start_date, event_start_time,
event_end_date, event_end_time, location, event_mode, event_category,
third_party_redirect, no_of_days,
time_in_hours, hosted_or_attending, refreshments_type,
registration_available, registration_link""",
# also accepts a string with the already downloaded HTML code
source="https://www.hmhco.com/event",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,31 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperLiteGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
smart_scraper_lite_graph = SmartScraperLiteGraph(
prompt="Who is Marco Perini?",
source="https://perinim.github.io/",
config=graph_config
)
result = smart_scraper_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,38 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,38 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiConcatGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,35 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
config=graph_config
)
result = smart_scraper_multi_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,49 @@
"""
Basic example of scraping pipeline using SmartScraper with schema
"""
import os
import json
from typing import List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,46 @@
"""
Basic example of scraping pipeline using SmartScraper using Azure OpenAI Key
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Initialize the model instances
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o"
},
"verbose": True,
"headless": False
}
smart_scraper_graph = XMLScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"api_key": os.environ["AZURE_OPENAI_KEY"],
"model": "azure_openai/gpt-4o",
},
"verbose": True,
"headless": False
}
# ************************************************
# Create the XMLScraperMultiGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperMultiGraph(
prompt="List me all the authors, title and genres of the books",
source=[text, text], # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,4 @@
AWS_ACCESS_KEY_ID="..."
AWS_SECRET_ACCESS_KEY="..."
AWS_SESSION_TOKEN="..."
AWS_DEFAULT_REGION="..."

View File

@ -0,0 +1,3 @@
This folder contains examples of how to use ScrapeGraphAI with [Amazon Bedrock](https://aws.amazon.com/bedrock/) ⛰️. The examples show how to extract information from websites and files using a natural language prompt.
![](scrapegraphai_bedrock.png)

View File

@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using Code Generator with schema
"""
import os, json
from typing import List
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from scrapegraphai.graphs import CodeGeneratorGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
"verbose": True,
"headless": False,
"reduction": 2,
"max_iterations": {
"overall": 10,
"syntax": 3,
"execution": 3,
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
code_generator_graph = CodeGeneratorGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = code_generator_graph.run()
print(result)

View File

@ -0,0 +1,50 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""
import os
import json
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the CSVScraperGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
)
result = csv_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the CSVScraperMultiGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,125 @@
"""
Example of custom graph using existing nodes
"""
import json
from dotenv import load_dotenv
from langchain_aws import BedrockEmbeddings
from scrapegraphai.models import Bedrock
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import (
FetchNode,
ParseNode,
RAGNode,
GenerateAnswerNode,
RobotsNode
)
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = Bedrock({
'model_id': graph_config["llm"]["model"].split("/")[-1],
'model_kwargs': {
'temperature': 0.0
}})
embedder = BedrockEmbeddings(model_id=graph_config["embeddings"]["model"].split("/")[-1])
# Define the nodes for the graph
robot_node = RobotsNode(
input="url",
output=["is_scrapable"],
node_config={
"llm_model": llm_model,
"force_scraping": True,
"verbose": True,
}
)
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"],
node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
# ************************************************
# Create the graph by defining the connections
# ************************************************
graph = BaseGraph(
nodes=[
robot_node,
fetch_node,
parse_node,
rag_node,
generate_answer_node,
],
edges=[
(robot_node, fetch_node),
(fetch_node, parse_node),
(parse_node, rag_node),
(rag_node, generate_answer_node)
],
entry_point=robot_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "List me all the articles",
"url": "https://perinim.github.io/projects"
})
# Get the answer from the result
result = result.get("answer", "No answer found.")
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,25 @@
"""
depth_search_graph_opeani example
"""
from scrapegraphai.graphs import DepthSearchGraph
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
"verbose": True,
"headless": False,
"depth": 2,
"only_inside_links": False,
}
search_graph = DepthSearchGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io",
config=graph_config
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,42 @@
"""
document_scraper example
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import DocumentScraperGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
source = """
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
Divided into three major sectionsInferno, Purgatorio, and Paradisothe narrative traces the journey of Dante
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
"""
pdf_scraper_graph = DocumentScraperGraph(
prompt="Summarize the text and find the main topics",
source=source,
config=graph_config,
)
result = pdf_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,120 @@
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>

View File

@ -0,0 +1,38 @@
{
"quiz": {
"sport": {
"q1": {
"question": "Which one is correct team name in NBA?",
"options": [
"New York Bulls",
"Los Angeles Kings",
"Golden State Warriros",
"Huston Rocket"
],
"answer": "Huston Rocket"
}
},
"maths": {
"q1": {
"question": "5 + 7 = ?",
"options": [
"10",
"11",
"12",
"13"
],
"answer": "12"
},
"q2": {
"question": "12 - 8 = ?",
"options": [
"1",
"2",
"3",
"4"
],
"answer": "4"
}
}
}
}

View File

@ -0,0 +1,105 @@
<body class="fixed-top-nav " style="padding-top: 57px;">
<header>
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
<div class="container">
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco&nbsp;</span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
<div class="collapse navbar-collapse text-right" id="navbarNav">
<ul class="navbar-nav ml-auto flex-nowrap">
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
<li class="nav-item dropdown active">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="/projects/">Projects</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="/competitions/">Competitions</a>
</div>
</li>
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
</ul>
</div>
</div>
</nav>
<progress id="progress" value="0" max="284" style="top: 57px;">
<div class="progress-container"> <span class="progress-bar"></span> </div>
</progress>
</header>
<div class="container mt-5">
<div class="post">
<header class="post-header">
<h1 class="post-title">Projects</h1>
<p class="post-description"></p>
</header>
<article>
<div class="projects">
<div class="grid" style="position: relative; height: 861.992px;">
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
<a href="/projects/rotary-pendulum-rl/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Rotary Pendulum RL</h4>
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">DQN Implementation from scratch</h4>
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Multi Agents HAED</h4>
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
<a href="/projects/wireless-esc-drone/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
</div>
</div>
</article>
</div>
</div>
<footer class="fixed-bottom">
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
</footer>
<div class="hiddendiv common"></div>
</body>

View File

@ -0,0 +1,6 @@
Username; Identifier;First name;Last name
booker12;9012;Rachel;Booker
grey07;2070;Laura;Grey
johnson81;4081;Craig;Johnson
jenkins46;9346;Mary;Jenkins
smith79;5079;Jamie;Smith
1 Username Identifier First name Last name
2 booker12 9012 Rachel Booker
3 grey07 2070 Laura Grey
4 johnson81 4081 Craig Johnson
5 jenkins46 9346 Mary Jenkins
6 smith79 5079 Jamie Smith

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the JSON file
# ************************************************
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the JSONScraperGraph instance and run it
# ************************************************
json_scraper_graph = JSONScraperGraph(
prompt="List me all questions and options in the math section, no answers.",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = json_scraper_graph.run()
print(json.dumps(result, indent=4))
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = json_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,32 @@
"""
Module for showing how JSONScraperMultiGraph multi works
"""
import os
import json
from scrapegraphai.graphs import JSONScraperMultiGraph
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
sources = [text, text]
multiple_search_graph = JSONScraperMultiGraph(
prompt= "List me all the authors, title and genres of the books",
source= sources,
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,44 @@
"""
Basic example of scraping pipeline using SmartScraper with a custom rate limit
"""
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0,
"rate_limit": {
"requests_per_second": 1
},
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,54 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
FILE_NAME = "inputs/plain_html_example.txt"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
# It could be also a http request using the request model
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

Binary file not shown.

After

Width:  |  Height:  |  Size: 80 KiB

View File

@ -0,0 +1,43 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
"library": "beautifulsoup"
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects",
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,48 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
from scrapegraphai.graphs import ScriptCreatorMultiGraph
from scrapegraphai.utils import prettify_exec_info
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
"library": "beautifulsoup"
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
urls=[
"https://schultzbergagency.com/emil-raste-karlsen/",
"https://schultzbergagency.com/johanna-hedberg/",
]
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorMultiGraph(
prompt="Find information about actors",
# also accepts a string with the already downloaded HTML code
source=urls,
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,27 @@
"""
Example of Search Graph
"""
from scrapegraphai.graphs import SearchGraph
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,54 @@
"""
Example of Search Graph
"""
from typing import List
from pydantic import BaseModel, Field
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
# ************************************************
# Define the output schema for the graph
# ************************************************
class Dish(BaseModel):
name: str = Field(description="The name of the dish")
description: str = Field(description="The description of the dish")
class Dishes(BaseModel):
dishes: List[Dish]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config,
schema=Dishes
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,40 @@
"""
Example of Search Graph
"""
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,42 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,26 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import json
from scrapegraphai.graphs import SmartScraperLiteGraph
from scrapegraphai.utils import prettify_exec_info
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
smart_scraper_lite_graph = SmartScraperLiteGraph(
prompt="Who is Marco Perini?",
source="https://perinim.github.io/",
config=graph_config
)
result = smart_scraper_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,34 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import json
from scrapegraphai.graphs import SmartScraperMultiGraph
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,35 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import json
from scrapegraphai.graphs import SmartScraperMultiConcatGraph
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiConcatGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,29 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import json
from scrapegraphai.graphs import SmartScraperMultiLiteGraph
from scrapegraphai.utils import prettify_exec_info
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
config=graph_config
)
result = smart_scraper_multi_lite_graph.run()
print(json.dumps(result, indent=4))
graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,52 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
from typing import List
from pydantic import BaseModel, Field
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,58 @@
"""
Basic example of scraping pipeline using XMLScraperGraph from XML documents
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
}
# ************************************************
# Create the XMLScraperGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperGraph(
prompt="List me all the authors, title and genres of the books. Skip the preamble.",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(json.dumps(result, indent=4))
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,56 @@
"""
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"client": "client_name",
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
"temperature": 0.0
},
}
# ************************************************
# Create the XMLScraperMultiGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperMultiGraph(
prompt="List me all the authors, title and genres of the books",
source=[text, text], # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1 @@
OPENAI_APIKEY="your openai key here"

View File

@ -0,0 +1,43 @@
# Local models
# Local models
The two websites benchmark are:
- Example 1: https://perinim.github.io/projects
- Example 2: https://www.wired.com (at 17/4/2024)
Both are strored locally as txt file in .txt format because in this way we do not have to think about the internet connection
The time is measured in seconds
The model runned for this benchmark is Mistral on Ollama with nomic-embed-text
| Hardware | Model | Example 1 | Example 2 |
| ---------------------- | --------------------------------------- | --------- | --------- |
| Macbook 14' m1 pro | Mistral on Ollama with nomic-embed-text | 30.54s | 35.76s |
| Macbook m2 max | Mistral on Ollama with nomic-embed-text | | |
| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text | 27.82s | 29.986s |
| Macbook m2 max<br> | Llama3 on Ollama with nomic-embed-text | | |
**Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama).
# Performance on APIs services
### Example 1: personal portfolio
**URL**: https://perinim.github.io/projects
**Task**: List me all the projects with their description.
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
| --------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
| gpt-3.5-turbo | 24.21 | 1892 | 1802 | 90 | 1 | 0.002883 |
| gpt-4-turbo-preview | 6.614 | 1936 | 1802 | 134 | 1 | 0.02204 |
| Grooq with nomic-embed-text | 6.71 | 2201 | 2024 | 177 | 1 | 0 |
### Example 2: Wired
**URL**: https://www.wired.com
**Task**: List me all the articles with their description.
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
| --------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
| gpt-3.5-turbo | | | | | | |
| gpt-4-turbo-preview | | | | | | |
| Grooq with nomic-embed-text | | | | | | |

View File

@ -0,0 +1,61 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
groq_key = os.getenv("GROQ_APIKEY")
graph_config = {
"llm": {
"model": "groq/gemma-7b-it",
"api_key": groq_key,
"temperature": 0
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"headless": False,
"library": "beautifoulsoup"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = ScriptCreatorGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
# ************************************************
# Read the text file
# ************************************************
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"model": "ollama/llama3",
"temperature": 0,
# "model_tokens": 2000, # set context length arbitrarily,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"library": "beautifoulsoup"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = ScriptCreatorGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,62 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("GPT4_KEY")
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
# "model_tokens": 2000, # set context length arbitrarily,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"library": "beautifoulsoup"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = ScriptCreatorGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,53 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "openai/gpt-3.5-turbo",
},
"library": "beautifoulsoup"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = ScriptCreatorGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,53 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "openai/gpt-4-turbo-2024-04-09",
},
"library": "beautifoulsoup"
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = ScriptCreatorGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,105 @@
<body class="fixed-top-nav " style="padding-top: 57px;">
<header>
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
<div class="container">
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco&nbsp;</span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
<div class="collapse navbar-collapse text-right" id="navbarNav">
<ul class="navbar-nav ml-auto flex-nowrap">
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
<li class="nav-item dropdown active">
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
<a class="dropdown-item" href="/projects/">Projects</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="/competitions/">Competitions</a>
</div>
</li>
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
</ul>
</div>
</div>
</nav>
<progress id="progress" value="0" max="284" style="top: 57px;">
<div class="progress-container"> <span class="progress-bar"></span> </div>
</progress>
</header>
<div class="container mt-5">
<div class="post">
<header class="post-header">
<h1 class="post-title">Projects</h1>
<p class="post-description"></p>
</header>
<article>
<div class="projects">
<div class="grid" style="position: relative; height: 861.992px;">
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
<a href="/projects/rotary-pendulum-rl/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Rotary Pendulum RL</h4>
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">DQN Implementation from scratch</h4>
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Multi Agents HAED</h4>
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
<div class="grid-sizer"></div>
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
<a href="/projects/wireless-esc-drone/">
<div class="card hoverable">
<figure>
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
</figure>
<div class="card-body">
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
<div class="row ml-1 mr-1 p-0"> </div>
</div>
</div>
</a>
</div>
</div>
</div>
</article>
</div>
</div>
<footer class="fixed-bottom">
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
</footer>
<div class="hiddendiv common"></div>
</body>

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
OPENAI_APIKEY="your openai key here"

View File

@ -0,0 +1,42 @@
# Local models
# Local models
The two websites benchmark are:
- Example 1: https://perinim.github.io/projects
- Example 2: https://www.wired.com (at 17/4/2024)
Both are strored locally as txt file in .txt format because in this way we do not have to think about the internet connection
| Hardware | Model | Example 1 | Example 2 |
| ---------------------- | --------------------------------------- | --------- | --------- |
| Macbook 14' m1 pro | Mistral on Ollama with nomic-embed-text | 16.291s | 38.74s |
| Macbook m2 max | Mistral on Ollama with nomic-embed-text | | |
| Macbook 14' m1 pro<br> | Llama3 on Ollama with nomic-embed-text | 12.88s | 13.84s |
| Macbook m2 max<br> | Llama3 on Ollama with nomic-embed-text | | |
**Note**: the examples on Docker are not runned on other devices than the Macbook because the performance are to slow (10 times slower than Ollama). Indeed the results are the following:
| Hardware | Example 1 | Example 2 |
| ------------------ | --------- | --------- |
| Macbook 14' m1 pro | 139.89 | Too long |
# Performance on APIs services
### Example 1: personal portfolio
**URL**: https://perinim.github.io/projects
**Task**: List me all the projects with their description.
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
| ------------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
| gpt-3.5-turbo | 4.132s | 438 | 303 | 135 | 1 | 0.000724 |
| gpt-4-turbo-preview | 6.965s | 442 | 303 | 139 | 1 | 0.0072 |
| gpt-4-o | 4.446s | 444 | 305 | 139 | 1 | 0 |
| Grooq with nomic-embed-text<br> | 1.335s | 648 | 482 | 166 | 1 | 0 |
### Example 2: Wired
**URL**: https://www.wired.com
**Task**: List me all the articles with their description.
| Name | Execution time (seconds) | total_tokens | prompt_tokens | completion_tokens | successful_requests | total_cost_USD |
| ------------------------------- | ------------------------ | ------------ | ------------- | ----------------- | ------------------- | -------------- |
| gpt-3.5-turbo | 8.836s | 1167 | 726 | 441 | 1 | 0.001971 |
| gpt-4-turbo-preview | 21.53s | 1205 | 726 | 479 | 1 | 0.02163 |
| gpt-4-o | 15.27s | 1400 | 715 | 685 | 1 | 0 |
| Grooq with nomic-embed-text<br> | 3.82s | 2459 | 2192 | 267 | 1 | 0 |

View File

@ -0,0 +1,51 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
# "model_tokens": 2000, # set context length arbitrarily
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = SmartScraperGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
groq_key = os.getenv("GROQ_APIKEY")
graph_config = {
"llm": {
"model": "groq/gemma-7b-it",
"api_key": groq_key,
"temperature": 0
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"headless": False
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = SmartScraperGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,53 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"model": "ollama/llama3",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
# "model_tokens": 2000, # set context length arbitrarily
"base_url": "http://localhost:11434",
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434",
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = SmartScraperGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,54 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
files = ["inputs/example_1.txt", "inputs/example_2.txt"]
tasks = ["List me all the projects with their description.",
"List me all the articles with their description."]
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"model": "ollama/mistral",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
# "model_tokens": 2000, # set context length arbitrarily
"base_url": "http://localhost:11434",
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
"base_url": "http://localhost:11434",
}
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
for i in range(0, 2):
with open(files[i], 'r', encoding="utf-8") as file:
text = file.read()
smart_scraper_graph = SmartScraperGraph(
prompt=tasks[i],
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

Some files were not shown because too many files have changed in this diff Show More