mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
Added bedrock examples
This commit is contained in:
parent
f1a25233d6
commit
7e5ff4e410
4
examples/bedrock/.env.example
Normal file
4
examples/bedrock/.env.example
Normal file
@ -0,0 +1,4 @@
|
||||
AWS_ACCESS_KEY_ID="..."
|
||||
AWS_SECRET_ACCESS_KEY="..."
|
||||
AWS_SESSION_TOKEN="..."
|
||||
AWS_DEFAULT_REGION="..."
|
||||
3
examples/bedrock/README.md
Normal file
3
examples/bedrock/README.md
Normal file
@ -0,0 +1,3 @@
|
||||
This folder contains examples of how to use ScrapeGraphAI with [Amazon Bedrock](https://aws.amazon.com/bedrock/) ⛰️. The examples show how to extract information from websites and files using a natural language prompt.
|
||||
|
||||

|
||||
63
examples/bedrock/csv_scraper_bedrock.py
Normal file
63
examples/bedrock/csv_scraper_bedrock.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from scrapegraphai.graphs import CSVScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperGraph(
|
||||
prompt="List me all the last names",
|
||||
source=str(text), # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
127
examples/bedrock/custom_graph_bedrock.py
Normal file
127
examples/bedrock/custom_graph_bedrock.py
Normal file
@ -0,0 +1,127 @@
|
||||
"""
|
||||
Example of custom graph using existing nodes
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from langchain_aws import BedrockEmbeddings
|
||||
from scrapegraphai.models import Bedrock
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import (
|
||||
FetchNode,
|
||||
ParseNode,
|
||||
RAGNode,
|
||||
GenerateAnswerNode,
|
||||
RobotsNode
|
||||
)
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = Bedrock({
|
||||
'model_id': graph_config["llm"]["model"].split("/")[-1],
|
||||
'model_kwargs': {
|
||||
'temperature': 0.0
|
||||
}})
|
||||
embedder = BedrockEmbeddings(model_id=graph_config["embeddings"]["model"].split("/")[-1])
|
||||
|
||||
# Define the nodes for the graph
|
||||
robot_node = RobotsNode(
|
||||
input="url",
|
||||
output=["is_scrapable"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"force_scraping": True,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc", "link_urls", "img_urls"],
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"embedder_model": embedder,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Create the graph by defining the connections
|
||||
# ************************************************
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
robot_node,
|
||||
fetch_node,
|
||||
parse_node,
|
||||
rag_node,
|
||||
generate_answer_node,
|
||||
],
|
||||
edges=[
|
||||
(robot_node, fetch_node),
|
||||
(fetch_node, parse_node),
|
||||
(parse_node, rag_node),
|
||||
(rag_node, generate_answer_node)
|
||||
],
|
||||
entry_point=robot_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "List me all the articles",
|
||||
"url": "https://perinim.github.io/projects"
|
||||
})
|
||||
|
||||
# Get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(json.dumps(result, indent=4))
|
||||
120
examples/bedrock/inputs/books.xml
Normal file
120
examples/bedrock/inputs/books.xml
Normal file
@ -0,0 +1,120 @@
|
||||
<?xml version="1.0"?>
|
||||
<catalog>
|
||||
<book id="bk101">
|
||||
<author>Gambardella, Matthew</author>
|
||||
<title>XML Developer's Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>44.95</price>
|
||||
<publish_date>2000-10-01</publish_date>
|
||||
<description>An in-depth look at creating applications
|
||||
with XML.</description>
|
||||
</book>
|
||||
<book id="bk102">
|
||||
<author>Ralls, Kim</author>
|
||||
<title>Midnight Rain</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-12-16</publish_date>
|
||||
<description>A former architect battles corporate zombies,
|
||||
an evil sorceress, and her own childhood to become queen
|
||||
of the world.</description>
|
||||
</book>
|
||||
<book id="bk103">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Maeve Ascendant</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2000-11-17</publish_date>
|
||||
<description>After the collapse of a nanotechnology
|
||||
society in England, the young survivors lay the
|
||||
foundation for a new society.</description>
|
||||
</book>
|
||||
<book id="bk104">
|
||||
<author>Corets, Eva</author>
|
||||
<title>Oberon's Legacy</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-03-10</publish_date>
|
||||
<description>In post-apocalypse England, the mysterious
|
||||
agent known only as Oberon helps to create a new life
|
||||
for the inhabitants of London. Sequel to Maeve
|
||||
Ascendant.</description>
|
||||
</book>
|
||||
<book id="bk105">
|
||||
<author>Corets, Eva</author>
|
||||
<title>The Sundered Grail</title>
|
||||
<genre>Fantasy</genre>
|
||||
<price>5.95</price>
|
||||
<publish_date>2001-09-10</publish_date>
|
||||
<description>The two daughters of Maeve, half-sisters,
|
||||
battle one another for control of England. Sequel to
|
||||
Oberon's Legacy.</description>
|
||||
</book>
|
||||
<book id="bk106">
|
||||
<author>Randall, Cynthia</author>
|
||||
<title>Lover Birds</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-09-02</publish_date>
|
||||
<description>When Carla meets Paul at an ornithology
|
||||
conference, tempers fly as feathers get ruffled.</description>
|
||||
</book>
|
||||
<book id="bk107">
|
||||
<author>Thurman, Paula</author>
|
||||
<title>Splish Splash</title>
|
||||
<genre>Romance</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>A deep sea diver finds true love twenty
|
||||
thousand leagues beneath the sea.</description>
|
||||
</book>
|
||||
<book id="bk108">
|
||||
<author>Knorr, Stefan</author>
|
||||
<title>Creepy Crawlies</title>
|
||||
<genre>Horror</genre>
|
||||
<price>4.95</price>
|
||||
<publish_date>2000-12-06</publish_date>
|
||||
<description>An anthology of horror stories about roaches,
|
||||
centipedes, scorpions and other insects.</description>
|
||||
</book>
|
||||
<book id="bk109">
|
||||
<author>Kress, Peter</author>
|
||||
<title>Paradox Lost</title>
|
||||
<genre>Science Fiction</genre>
|
||||
<price>6.95</price>
|
||||
<publish_date>2000-11-02</publish_date>
|
||||
<description>After an inadvertant trip through a Heisenberg
|
||||
Uncertainty Device, James Salway discovers the problems
|
||||
of being quantum.</description>
|
||||
</book>
|
||||
<book id="bk110">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>Microsoft .NET: The Programming Bible</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-09</publish_date>
|
||||
<description>Microsoft's .NET initiative is explored in
|
||||
detail in this deep programmer's reference.</description>
|
||||
</book>
|
||||
<book id="bk111">
|
||||
<author>O'Brien, Tim</author>
|
||||
<title>MSXML3: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>36.95</price>
|
||||
<publish_date>2000-12-01</publish_date>
|
||||
<description>The Microsoft MSXML3 parser is covered in
|
||||
detail, with attention to XML DOM interfaces, XSLT processing,
|
||||
SAX and more.</description>
|
||||
</book>
|
||||
<book id="bk112">
|
||||
<author>Galos, Mike</author>
|
||||
<title>Visual Studio 7: A Comprehensive Guide</title>
|
||||
<genre>Computer</genre>
|
||||
<price>49.95</price>
|
||||
<publish_date>2001-04-16</publish_date>
|
||||
<description>Microsoft Visual Studio 7 is explored in depth,
|
||||
looking at how Visual Basic, Visual C++, C#, and ASP+ are
|
||||
integrated into a comprehensive development
|
||||
environment.</description>
|
||||
</book>
|
||||
</catalog>
|
||||
38
examples/bedrock/inputs/example.json
Normal file
38
examples/bedrock/inputs/example.json
Normal file
@ -0,0 +1,38 @@
|
||||
{
|
||||
"quiz": {
|
||||
"sport": {
|
||||
"q1": {
|
||||
"question": "Which one is correct team name in NBA?",
|
||||
"options": [
|
||||
"New York Bulls",
|
||||
"Los Angeles Kings",
|
||||
"Golden State Warriros",
|
||||
"Huston Rocket"
|
||||
],
|
||||
"answer": "Huston Rocket"
|
||||
}
|
||||
},
|
||||
"maths": {
|
||||
"q1": {
|
||||
"question": "5 + 7 = ?",
|
||||
"options": [
|
||||
"10",
|
||||
"11",
|
||||
"12",
|
||||
"13"
|
||||
],
|
||||
"answer": "12"
|
||||
},
|
||||
"q2": {
|
||||
"question": "12 - 8 = ?",
|
||||
"options": [
|
||||
"1",
|
||||
"2",
|
||||
"3",
|
||||
"4"
|
||||
],
|
||||
"answer": "4"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
105
examples/bedrock/inputs/plain_html_example.txt
Normal file
105
examples/bedrock/inputs/plain_html_example.txt
Normal file
@ -0,0 +1,105 @@
|
||||
<body class="fixed-top-nav " style="padding-top: 57px;">
|
||||
<header>
|
||||
<nav id="navbar" class="navbar navbar-light navbar-expand-sm fixed-top">
|
||||
<div class="container">
|
||||
<a class="navbar-brand title font-weight-lighter" href="/"><span class="font-weight-bold">Marco </span>Perini</a> <button class="navbar-toggler collapsed ml-auto" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> <span class="sr-only">Toggle navigation</span> <span class="icon-bar top-bar"></span> <span class="icon-bar middle-bar"></span> <span class="icon-bar bottom-bar"></span> </button>
|
||||
<div class="collapse navbar-collapse text-right" id="navbarNav">
|
||||
<ul class="navbar-nav ml-auto flex-nowrap">
|
||||
<li class="nav-item "> <a class="nav-link" href="/">About</a> </li>
|
||||
<li class="nav-item dropdown active">
|
||||
<a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Projects<span class="sr-only">(current)</span></a>
|
||||
<div class="dropdown-menu dropdown-menu-right" aria-labelledby="navbarDropdown">
|
||||
<a class="dropdown-item" href="/projects/">Projects</a>
|
||||
<div class="dropdown-divider"></div>
|
||||
<a class="dropdown-item" href="/competitions/">Competitions</a>
|
||||
</div>
|
||||
</li>
|
||||
<li class="nav-item "> <a class="nav-link" href="/cv/">CV</a> </li>
|
||||
<li class="toggle-container"> <button id="light-toggle" title="Change theme"> <i class="fa-solid fa-moon"></i> <i class="fa-solid fa-sun"></i> </button> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</nav>
|
||||
<progress id="progress" value="0" max="284" style="top: 57px;">
|
||||
<div class="progress-container"> <span class="progress-bar"></span> </div>
|
||||
</progress>
|
||||
</header>
|
||||
<div class="container mt-5">
|
||||
<div class="post">
|
||||
<header class="post-header">
|
||||
<h1 class="post-title">Projects</h1>
|
||||
<p class="post-description"></p>
|
||||
</header>
|
||||
<article>
|
||||
<div class="projects">
|
||||
<div class="grid" style="position: relative; height: 861.992px;">
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 0px;">
|
||||
<a href="/projects/rotary-pendulum-rl/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/rotary_pybullet.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Rotary Pendulum RL</h4>
|
||||
<p class="card-text">Open Source project aimed at controlling a real life rotary pendulum using RL algorithms</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 0px;">
|
||||
<a href="https://github.com/PeriniM/DQN-SwingUp" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/value-policy-heatmaps.jpg" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">DQN Implementation from scratch</h4>
|
||||
<p class="card-text">Developed a Deep Q-Network algorithm to train a simple and double pendulum</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 0px; top: 447.414px;">
|
||||
<a href="https://github.com/PeriniM/Multi-Agents-HAED" rel="external nofollow noopener" target="_blank">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/multi_agents_haed.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Multi Agents HAED</h4>
|
||||
<p class="card-text">University project which focuses on simulating a multi-agent system to perform environment mapping. Agents, equipped with sensors, explore and record their surroundings, considering uncertainties in their readings.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
<div class="grid-sizer"></div>
|
||||
<div class="grid-item" style="position: absolute; left: 260px; top: 370.172px;">
|
||||
<a href="/projects/wireless-esc-drone/">
|
||||
<div class="card hoverable">
|
||||
<figure>
|
||||
<picture> <img src="/assets/img/wireless_esc.gif" width="auto" height="auto" alt="project thumbnail" onerror="this.onerror=null; $('.responsive-img-srcset').remove();"> </picture>
|
||||
</figure>
|
||||
<div class="card-body">
|
||||
<h4 class="card-title">Wireless ESC for Modular Drones</h4>
|
||||
<p class="card-text">Modular drone architecture proposal and proof of concept. The project received maximum grade.</p>
|
||||
<div class="row ml-1 mr-1 p-0"> </div>
|
||||
</div>
|
||||
</div>
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</div>
|
||||
<footer class="fixed-bottom">
|
||||
<div class="container mt-0"> © Copyright 2023 Marco Perini. Powered by <a href="https://jekyllrb.com/" target="_blank" rel="external nofollow noopener">Jekyll</a> with <a href="https://github.com/alshedivat/al-folio" rel="external nofollow noopener" target="_blank">al-folio</a> theme. Hosted by <a href="https://pages.github.com/" target="_blank" rel="external nofollow noopener">GitHub Pages</a>. </div>
|
||||
</footer>
|
||||
<div class="hiddendiv common"></div>
|
||||
</body>
|
||||
6
examples/bedrock/inputs/username.csv
Normal file
6
examples/bedrock/inputs/username.csv
Normal file
@ -0,0 +1,6 @@
|
||||
Username; Identifier;First name;Last name
|
||||
booker12;9012;Rachel;Booker
|
||||
grey07;2070;Laura;Grey
|
||||
johnson81;4081;Craig;Johnson
|
||||
jenkins46;9346;Mary;Jenkins
|
||||
smith79;5079;Jamie;Smith
|
||||
|
63
examples/bedrock/json_scraper_bedrock.py
Normal file
63
examples/bedrock/json_scraper_bedrock.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the JSON file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the JSONScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
json_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all questions and options, no answers.",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = json_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = json_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
|
||||
59
examples/bedrock/scrape_plain_text_bedrock.py
Normal file
59
examples/bedrock/scrape_plain_text_bedrock.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper from text
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the text file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/plain_html_example.txt"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
# It could be also a http request using the request model
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
source=text,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
BIN
examples/bedrock/scrapegraphai_bedrock.png
Normal file
BIN
examples/bedrock/scrapegraphai_bedrock.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 80 KiB |
47
examples/bedrock/script_generator_bedrock.py
Normal file
47
examples/bedrock/script_generator_bedrock.py
Normal file
@ -0,0 +1,47 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using ScriptCreatorGraph
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from scrapegraphai.graphs import ScriptCreatorGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the ScriptCreatorGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
script_creator_graph = ScriptCreatorGraph(
|
||||
prompt="List me all the projects with their description.",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = script_creator_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = script_creator_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
46
examples/bedrock/search_graph_bedrock.py
Normal file
46
examples/bedrock/search_graph_bedrock.py
Normal file
@ -0,0 +1,46 @@
|
||||
"""
|
||||
Example of Search Graph
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SearchGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/amazon.titan-embed-text-v2:0"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the SearchGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
search_graph = SearchGraph(
|
||||
prompt="List me Chioggia's famous dishes",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = search_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = search_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json and csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
@ -1,42 +1,47 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
Smartscraper example on bedrock
|
||||
"""
|
||||
import boto3
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperGraph
|
||||
from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
# 0a. Initialize session
|
||||
# If not required delete it
|
||||
session = boto3.Session(
|
||||
aws_access_key_id="...",
|
||||
aws_secret_access_key="...",
|
||||
aws_session_token="...",
|
||||
region_name="us-east-1"
|
||||
)
|
||||
load_dotenv()
|
||||
|
||||
# 0b. Initialize client
|
||||
client = session.client("bedrock-runtime")
|
||||
|
||||
# 1. Define graph configuration
|
||||
config = {
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": client,
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0,
|
||||
"format": "json"
|
||||
},
|
||||
"embeddings": {
|
||||
"client": client,
|
||||
"model": "bedrock/cohere.embed-multilingual-v3",
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
|
||||
# 2. Create graph instance
|
||||
graph = SmartScraperGraph(
|
||||
prompt="List me all the articles",
|
||||
source="https://perinim.github.io/projects",
|
||||
config=config
|
||||
# ************************************************
|
||||
# Create the SmartScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
# 3. Scrape away!
|
||||
print(graph.run())
|
||||
result = smart_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = smart_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
62
examples/bedrock/xml_scraper_bedrock.py
Normal file
62
examples/bedrock/xml_scraper_bedrock.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperGraph(
|
||||
prompt="List me all the authors, title and genres of the books. Skip the preamble.",
|
||||
source=text, # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
|
||||
Loading…
Reference in New Issue
Block a user