mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-01 21:00:48 +08:00
chore(mistral): create examples
This commit is contained in:
parent
17f2707313
commit
f8ad616e10
1
examples/mistral/.env.example
Normal file
1
examples/mistral/.env.example
Normal file
@ -0,0 +1 @@
|
||||
MISTRAL_API_KEY="YOUR MISTRAL API KEY"
|
||||
56
examples/mistral/csv_scraper_graph_multi_mistral.py
Normal file
56
examples/mistral/csv_scraper_graph_multi_mistral.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
57
examples/mistral/csv_scraper_mistral.py
Normal file
57
examples/mistral/csv_scraper_mistral.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperGraph(
|
||||
prompt="List me all the last names",
|
||||
source=str(text), # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
110
examples/mistral/custom_graph_mistral.py
Normal file
110
examples/mistral/custom_graph_mistral.py
Normal file
@ -0,0 +1,110 @@
|
||||
"""
|
||||
Example of custom graph using existing nodes
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from scrapegraphai.models import OpenAI
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = OpenAI(graph_config["llm"])
|
||||
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||
|
||||
# define the nodes for the graph
|
||||
robot_node = RobotsNode(
|
||||
input="url",
|
||||
output=["is_scrapable"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"force_scraping": True,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc", "link_urls", "img_urls"],
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"embedder_model": embedder,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Create the graph by defining the connections
|
||||
# ************************************************
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
robot_node,
|
||||
fetch_node,
|
||||
parse_node,
|
||||
rag_node,
|
||||
generate_answer_node,
|
||||
],
|
||||
edges=[
|
||||
(robot_node, fetch_node),
|
||||
(fetch_node, parse_node),
|
||||
(parse_node, rag_node),
|
||||
(rag_node, generate_answer_node)
|
||||
],
|
||||
entry_point=robot_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "Describe the content",
|
||||
"url": "https://example.com/"
|
||||
})
|
||||
|
||||
# get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(result)
|
||||
47
examples/mistral/deep_scraper_mistral.py
Normal file
47
examples/mistral/deep_scraper_mistral.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import DeepScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"max_depth": 1
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
deep_scraper_graph = DeepScraperGraph(
|
||||
prompt="List me all the job titles and detailed job description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = deep_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = deep_scraper_graph.get_execution_info()
|
||||
print(deep_scraper_graph.get_state("relevant_links"))
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
120
examples/mistral/inputs/books.xml
Normal file
120
examples/mistral/inputs/books.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0"?>
|
||||
<catalog>
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</catalog>
|
||||
182
examples/mistral/inputs/example.json
Normal file
182
examples/mistral/inputs/example.json
Normal file
@ -0,0 +1,182 @@
|
||||
{
|
||||
"kind":"youtube#searchListResponse",
|
||||
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
|
||||
"nextPageToken":"CAUQAA",
|
||||
"regionCode":"NL",
|
||||
"pageInfo":{
|
||||
"totalResults":1000000,
|
||||
"resultsPerPage":5
|
||||
},
|
||||
"items":[
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"TvWDY4Mm5GM"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T14:15:01Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T14:15:01Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"aZM_42CcNZ4"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:09:27Z",
|
||||
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
|
||||
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
|
||||
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"John Nellis",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:09:27Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"wkP3XS3aNAY"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T16:00:50Z",
|
||||
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
|
||||
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
|
||||
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Shoot for Love",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T16:00:50Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"rJkDZ0WvfT8"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-24T10:00:39Z",
|
||||
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
|
||||
"title":"TOP 10 DEFENDERS 2023",
|
||||
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"Home of Football",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-24T10:00:39Z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind":"youtube#searchResult",
|
||||
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
|
||||
"id":{
|
||||
"kind":"youtube#video",
|
||||
"videoId":"XH0rtu4U6SE"
|
||||
},
|
||||
"snippet":{
|
||||
"publishedAt":"2023-07-21T16:30:05Z",
|
||||
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
|
||||
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
|
||||
"description":"",
|
||||
"thumbnails":{
|
||||
"default":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
|
||||
"width":120,
|
||||
"height":90
|
||||
},
|
||||
"medium":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
|
||||
"width":320,
|
||||
"height":180
|
||||
},
|
||||
"high":{
|
||||
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
|
||||
"width":480,
|
||||
"height":360
|
||||
}
|
||||
},
|
||||
"channelTitle":"FC Motivate",
|
||||
"liveBroadcastContent":"none",
|
||||
"publishTime":"2023-07-21T16:30:05Z"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
35
examples/mistral/inputs/markdown_example.md
Normal file
35
examples/mistral/inputs/markdown_example.md
Normal file
@ -0,0 +1,35 @@
|
||||
Marco Perini Toggle navigation
|
||||
|
||||
* About
|
||||
* Projects(current)
|
||||
|
||||
Projects
|
||||
|
||||
Competitions
|
||||
|
||||
* CV
|
||||
* ____
|
||||
|
||||
# Projects
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
© Copyright 2023 Marco Perini. Powered by Jekyll with
|
||||
al-folio theme. Hosted by [GitHub
|
||||
Pages](https://pages.github.com/).
|
||||
105
examples/mistral/inputs/plain_html_example.txt
Normal file
105
examples/mistral/inputs/plain_html_example.txt
Normal file
@ -0,0 +1,105 @@
|
||||
<body class="fixed-top-nav " style="padding-top: 57px;">
|
||||
<header>
|
||||
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco </span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
|
||||
<div class="collapse navbar-collapse text-right" id="navbarNav">
|
||||
<ul class="navbar-nav ml-auto flex-nowrap">
|
||||
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
|
||||
<li class="nav-item dropdown active">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
|
||||
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="/projects/">Projects</a>
|
||||
<div class="dropdown-divider"></div>
|
||||
<a class="dropdown-item" href="/competitions/">Competitions</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
|
||||
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<progress id="progress" value="0" max="284" style="top: 57px;">
|
||||
<div class="progress-container"> <span class="progress-bar"></span> </div>
|
||||
</progress>
|
||||
</header>
|
||||
<div class="container mt-5">
|
||||
<div class="post">
|
||||
<header class="post-header">
|
||||
<h1 class="post-title">Projects</h1>
|
||||
<p class="post-description"></p>
|
||||
</header>
|
||||
<article>
|
||||
<div class="projects">
|
||||
<div class="grid" style="position: relative; height: 861.992px;">
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
|
||||
<a href="/projects/rotary-pendulum-rl/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Rotary Pendulum RL</h4>
|
||||
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
|
||||
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">DQN Implementation from scratch</h4>
|
||||
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
|
||||
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Multi Agents HAED</h4>
|
||||
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
|
||||
<a href="/projects/wireless-esc-drone/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
|
||||
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
<footer class="fixed-bottom">
|
||||
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
|
||||
</footer>
|
||||
<div class="hiddendiv common"></div>
|
||||
</body>
|
||||
7
examples/mistral/inputs/username.csv
Normal file
7
examples/mistral/inputs/username.csv
Normal file
@ -0,0 +1,7 @@
|
||||
Username; Identifier;First name;Last name
|
||||
booker12;9012;Rachel;Booker
|
||||
grey07;2070;Laura;Grey
|
||||
johnson81;4081;Craig;Johnson
|
||||
jenkins46;9346;Mary;Jenkins
|
||||
smith79;5079;Jamie;Smith
|
||||
|
||||
|
58
examples/mistral/json_scraper_mistral.py
Normal file
58
examples/mistral/json_scraper_mistral.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the JSON file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the JSONScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
json_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = json_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = json_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
|
||||
37
examples/mistral/json_scraper_multi_mistral.py
Normal file
37
examples/mistral/json_scraper_multi_mistral.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
}
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
57
examples/mistral/md_scraper_mistral.py
Normal file
57
examples/mistral/md_scraper_mistral.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using MDScraperGraph from MD documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import MDScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the MD file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/markdown_example.md"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the MDScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
md_scraper_graph = MDScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = md_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = md_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
40
examples/mistral/pdf_scraper_mistral.py
Normal file
40
examples/mistral/pdf_scraper_mistral.py
Normal file
@ -0,0 +1,40 @@
|
||||
import os, json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PDFScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
source = """
|
||||
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
|
||||
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
|
||||
Divided into three major sections—Inferno, Purgatorio, and Paradiso—the narrative traces the journey of Dante
|
||||
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
|
||||
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
|
||||
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
|
||||
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
|
||||
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
|
||||
"""
|
||||
|
||||
pdf_scraper_graph = PDFScraperGraph(
|
||||
prompt="Summarize the text and find the main topics",
|
||||
source=source,
|
||||
config=graph_config,
|
||||
)
|
||||
result = pdf_scraper_graph.run()
|
||||
|
||||
print(json.dumps(result, indent=4))
|
||||
64
examples/mistral/pdf_scraper_multi_mistral.py
Normal file
64
examples/mistral/pdf_scraper_multi_mistral.py
Normal file
@ -0,0 +1,64 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Article(BaseModel):
|
||||
independent_variable: str = Field(description="(IV): The variable that is manipulated or considered as the primary cause affecting other variables.")
|
||||
dependent_variable: str = Field(description="(DV) The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.")
|
||||
exogenous_shock: str = Field(description="Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.")
|
||||
|
||||
class Articles(BaseModel):
|
||||
articles: List[Article]
|
||||
|
||||
# ************************************************
|
||||
# Define the sources for the graph
|
||||
# ************************************************
|
||||
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons."
|
||||
]
|
||||
|
||||
prompt = """
|
||||
Analyze the abstracts provided from an academic journal article to extract and clearly identify the Independent Variable (IV), Dependent Variable (DV), and Exogenous Shock.
|
||||
"""
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=Articles,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
1
examples/mistral/readme.md
Normal file
1
examples/mistral/readme.md
Normal file
@ -0,0 +1 @@
|
||||
This folder contains examples of how to use ScrapeGraph-AI with Mistral, an LLM provider. The examples show how to extract information from a website using a natural language prompt.
|
||||
55
examples/mistral/scrape_plain_text_mistral.py
Normal file
55
examples/mistral/scrape_plain_text_mistral.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/plain_html_example.txt"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# It could be also a http request using the request model
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
46
examples/mistral/script_generator_mistral.py
Normal file
46
examples/mistral/script_generator_mistral.py
Normal file
@ -0,0 +1,46 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
62
examples/mistral/script_generator_schema_mistral.py
Normal file
62
examples/mistral/script_generator_schema_mistral.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"library": "beautifulsoup",
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config,
|
||||
schema=Projects
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
54
examples/mistral/script_multi_generator_mistral.py
Normal file
54
examples/mistral/script_multi_generator_mistral.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import ScriptCreatorMultiGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"library": "beautifulsoup",
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
urls=[
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
]
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source=urls,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
35
examples/mistral/search_graph_mistral.py
Normal file
35
examples/mistral/search_graph_mistral.py
Normal file
@ -0,0 +1,35 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"max_results": 2,
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
62
examples/mistral/search_graph_schema_mistral.py
Normal file
62
examples/mistral/search_graph_schema_mistral.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Dish(BaseModel):
|
||||
name: str = Field(description="The name of the dish")
|
||||
description: str = Field(description="The description of the dish")
|
||||
|
||||
class Dishes(BaseModel):
|
||||
dishes: List[Dish]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"max_results": 2,
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config,
|
||||
schema=Dishes
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
43
examples/mistral/search_link_graph_mistral.py
Normal file
43
examples/mistral/search_link_graph_mistral.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchLinkGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchLinkGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SearchLinkGraph(
|
||||
source="https://sport.sky.it/nba?gr=www",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
43
examples/mistral/smart_scraper_mistral.py
Normal file
43
examples/mistral/smart_scraper_mistral.py
Normal file
@ -0,0 +1,43 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os, json
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("MISTRAL_API_KEY"),
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me what does the company do, the name and a contact email.",
|
||||
source="https://scrapegraphai.com/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
42
examples/mistral/smart_scraper_multi_mistral.py
Normal file
42
examples/mistral/smart_scraper_multi_mistral.py
Normal file
@ -0,0 +1,42 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = SmartScraperMultiGraph(
|
||||
prompt="Who is Marco Perini?",
|
||||
source= [
|
||||
"https://perinim.github.io/",
|
||||
"https://perinim.github.io/cv/"
|
||||
],
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
51
examples/mistral/smart_scraper_schema_mistral.py
Normal file
51
examples/mistral/smart_scraper_schema_mistral.py
Normal file
@ -0,0 +1,51 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper with schema
|
||||
"""
|
||||
|
||||
import os, json
|
||||
from typing import List
|
||||
from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the output schema for the graph
|
||||
# ************************************************
|
||||
|
||||
class Project(BaseModel):
|
||||
title: str = Field(description="The title of the project")
|
||||
description: str = Field(description="The description of the project")
|
||||
|
||||
class Projects(BaseModel):
|
||||
projects: List[Project]
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key":mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
source="https://perinim.github.io/projects/",
|
||||
schema=Projects,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
57
examples/mistral/speech_graph_mistral.py
Normal file
57
examples/mistral/speech_graph_mistral.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SpeechSummaryGraph
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SpeechGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define audio output path
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "website_summary.mp3"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
output_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
"temperature": 0.7,
|
||||
},
|
||||
"tts_model": {
|
||||
"api_key": mistral_key,
|
||||
"model": "tts-1",
|
||||
"voice": "alloy"
|
||||
},
|
||||
"output_path": output_path,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SpeechGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
speech_graph = SpeechGraph(
|
||||
prompt="Make a detailed audio summary of the projects.",
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config,
|
||||
)
|
||||
|
||||
result = speech_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = speech_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
59
examples/mistral/xml_scraper_graph_multi_mistral.py
Normal file
59
examples/mistral/xml_scraper_graph_multi_mistral.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key":mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
59
examples/mistral/xml_scraper_mistral.py
Normal file
59
examples/mistral/xml_scraper_mistral.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
mistral_key = os.getenv("MISTRAL_API_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": mistral_key,
|
||||
"model": "mistral/open-mistral-nemo",
|
||||
},
|
||||
"verbose":False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
|
||||
@ -191,7 +191,7 @@ class AbstractGraph(ABC):
|
||||
|
||||
if llm_params["model"].startswith("mistral"):
|
||||
model_name = llm_params["model"].split("/")[-1]
|
||||
return handle_model(model_name, "mistral", model_name)
|
||||
return handle_model(model_name, "mistralai", model_name)
|
||||
|
||||
# Instantiate the language model based on the model name (models that do not use the common interface)
|
||||
if "deepseek" in llm_params["model"]:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user