Merge pull request #605 from ScrapeGraphAI/togheter_ai_integration

feat: add togetherai
This commit is contained in:
Federico Aguzzi 2024-08-28 22:42:51 +02:00 committed by GitHub
commit 5f604d1341
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 1215 additions and 6 deletions

View File

@ -36,7 +36,7 @@ By the way if you to use not mandatory modules it is necessary to install by you
### Installing "Other Language Models"
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints.
```bash
pip install scrapegraphai[other-language-models]

View File

@ -0,0 +1 @@
TOGETHER_APIKEY="your api key"

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the CSVScraperMultiGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""
import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************
FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
text = pd.read_csv(file_path)
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the CSVScraperGraph instance and run it
# ************************************************
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
)
result = csv_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,120 @@
<?xml version="1.0"?>
<catalog>
<book id="bk101">
<author>Gambardella, Matthew</author>
<title>XML Developer's Guide</title>
<genre>Computer</genre>
<price>44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102">
<author>Ralls, Kim</author>
<title>Midnight Rain</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
<book id="bk103">
<author>Corets, Eva</author>
<title>Maeve Ascendant</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2000-11-17</publish_date>
<description>After the collapse of a nanotechnology
society in England, the young survivors lay the
foundation for a new society.</description>
</book>
<book id="bk104">
<author>Corets, Eva</author>
<title>Oberon's Legacy</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-03-10</publish_date>
<description>In post-apocalypse England, the mysterious
agent known only as Oberon helps to create a new life
for the inhabitants of London. Sequel to Maeve
Ascendant.</description>
</book>
<book id="bk105">
<author>Corets, Eva</author>
<title>The Sundered Grail</title>
<genre>Fantasy</genre>
<price>5.95</price>
<publish_date>2001-09-10</publish_date>
<description>The two daughters of Maeve, half-sisters,
battle one another for control of England. Sequel to
Oberon's Legacy.</description>
</book>
<book id="bk106">
<author>Randall, Cynthia</author>
<title>Lover Birds</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-09-02</publish_date>
<description>When Carla meets Paul at an ornithology
conference, tempers fly as feathers get ruffled.</description>
</book>
<book id="bk107">
<author>Thurman, Paula</author>
<title>Splish Splash</title>
<genre>Romance</genre>
<price>4.95</price>
<publish_date>2000-11-02</publish_date>
<description>A deep sea diver finds true love twenty
thousand leagues beneath the sea.</description>
</book>
<book id="bk108">
<author>Knorr, Stefan</author>
<title>Creepy Crawlies</title>
<genre>Horror</genre>
<price>4.95</price>
<publish_date>2000-12-06</publish_date>
<description>An anthology of horror stories about roaches,
centipedes, scorpions and other insects.</description>
</book>
<book id="bk109">
<author>Kress, Peter</author>
<title>Paradox Lost</title>
<genre>Science Fiction</genre>
<price>6.95</price>
<publish_date>2000-11-02</publish_date>
<description>After an inadvertant trip through a Heisenberg
Uncertainty Device, James Salway discovers the problems
of being quantum.</description>
</book>
<book id="bk110">
<author>O'Brien, Tim</author>
<title>Microsoft .NET: The Programming Bible</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-09</publish_date>
<description>Microsoft's .NET initiative is explored in
detail in this deep programmer's reference.</description>
</book>
<book id="bk111">
<author>O'Brien, Tim</author>
<title>MSXML3: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>36.95</price>
<publish_date>2000-12-01</publish_date>
<description>The Microsoft MSXML3 parser is covered in
detail, with attention to XML DOM interfaces, XSLT processing,
SAX and more.</description>
</book>
<book id="bk112">
<author>Galos, Mike</author>
<title>Visual Studio 7: A Comprehensive Guide</title>
<genre>Computer</genre>
<price>49.95</price>
<publish_date>2001-04-16</publish_date>
<description>Microsoft Visual Studio 7 is explored in depth,
looking at how Visual Basic, Visual C++, C#, and ASP+ are
integrated into a comprehensive development
environment.</description>
</book>
</catalog>

View File

@ -0,0 +1,182 @@
{
"kind":"youtube#searchListResponse",
"etag":"q4ibjmYp1KA3RqMF4jFLl6PBwOg",
"nextPageToken":"CAUQAA",
"regionCode":"NL",
"pageInfo":{
"totalResults":1000000,
"resultsPerPage":5
},
"items":[
{
"kind":"youtube#searchResult",
"etag":"QCsHBifbaernVCbLv8Cu6rAeaDQ",
"id":{
"kind":"youtube#video",
"videoId":"TvWDY4Mm5GM"
},
"snippet":{
"publishedAt":"2023-07-24T14:15:01Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Football Clubs Kylian Mbappe Should Avoid Signing ✍️❌⚽️ #football #mbappe #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/TvWDY4Mm5GM/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T14:15:01Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"0NG5QHdtIQM_V-DBJDEf-jK_Y9k",
"id":{
"kind":"youtube#video",
"videoId":"aZM_42CcNZ4"
},
"snippet":{
"publishedAt":"2023-07-24T16:09:27Z",
"channelId":"UCM5gMM_HqfKHYIEJ3lstMUA",
"title":"Which Football Club Could Cristiano Ronaldo Afford To Buy? 💰",
"description":"Sign up to Sorare and get a FREE card: https://sorare.pxf.io/NellisShorts Give Soraredata a go for FREE: ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/aZM_42CcNZ4/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"John Nellis",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:09:27Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"WbBz4oh9I5VaYj91LjeJvffrBVY",
"id":{
"kind":"youtube#video",
"videoId":"wkP3XS3aNAY"
},
"snippet":{
"publishedAt":"2023-07-24T16:00:50Z",
"channelId":"UC4EP1dxFDPup_aFLt0ElsDw",
"title":"PAULO DYBALA vs THE WORLD'S LONGEST FREEKICK WALL",
"description":"Can Paulo Dybala curl a football around the World's longest free kick wall? We met up with the World Cup winner and put him to ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/wkP3XS3aNAY/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Shoot for Love",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T16:00:50Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"juxv_FhT_l4qrR05S1QTrb4CGh8",
"id":{
"kind":"youtube#video",
"videoId":"rJkDZ0WvfT8"
},
"snippet":{
"publishedAt":"2023-07-24T10:00:39Z",
"channelId":"UCO8qj5u80Ga7N_tP3BZWWhQ",
"title":"TOP 10 DEFENDERS 2023",
"description":"SoccerKingz https://soccerkingz.nl Use code: 'ILOVEHOF' to get 10% off. TOP 10 DEFENDERS 2023 Follow us! • Instagram ...",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/rJkDZ0WvfT8/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"Home of Football",
"liveBroadcastContent":"none",
"publishTime":"2023-07-24T10:00:39Z"
}
},
{
"kind":"youtube#searchResult",
"etag":"wtuknXTmI1txoULeH3aWaOuXOow",
"id":{
"kind":"youtube#video",
"videoId":"XH0rtu4U6SE"
},
"snippet":{
"publishedAt":"2023-07-21T16:30:05Z",
"channelId":"UCwozCpFp9g9x0wAzuFh0hwQ",
"title":"3 Things You Didn't Know About Erling Haaland ⚽️🇳🇴 #football #haaland #shorts",
"description":"",
"thumbnails":{
"default":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/default.jpg",
"width":120,
"height":90
},
"medium":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/mqdefault.jpg",
"width":320,
"height":180
},
"high":{
"url":"https://i.ytimg.com/vi/XH0rtu4U6SE/hqdefault.jpg",
"width":480,
"height":360
}
},
"channelTitle":"FC Motivate",
"liveBroadcastContent":"none",
"publishTime":"2023-07-21T16:30:05Z"
}
}
]
}

View File

@ -0,0 +1,7 @@
Username; Identifier;First name;Last name
booker12;9012;Rachel;Booker
grey07;2070;Laura;Grey
johnson81;4081;Craig;Johnson
jenkins46;9346;Mary;Jenkins
smith79;5079;Jamie;Smith
1 Username Identifier First name Last name
2 booker12 9012 Rachel Booker
3 grey07 2070 Laura Grey
4 johnson81 4081 Craig Johnson
5 jenkins46 9346 Mary Jenkins
6 smith79 5079 Jamie Smith

View File

@ -0,0 +1,38 @@
"""
Module for showing how JSONScraperMultiGraph multi works
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperMultiGraph
load_dotenv()
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
sources = [text, text]
multiple_search_graph = JSONScraperMultiGraph(
prompt= "List me all the authors, title and genres of the books",
source= sources,
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,57 @@
"""
Basic example of scraping pipeline using JSONScraperGraph from JSON documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import JSONScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the JSON file
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
FILE_NAME = "inputs/example.json"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the JSONScraperGraph instance and run it
# ************************************************
json_scraper_graph = JSONScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = json_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = json_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,44 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os, json
from dotenv import load_dotenv
from scrapegraphai.utils import prettify_exec_info
from scrapegraphai.graphs import PDFScraperGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
source = """
The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian
circa 1308/21 by Dante. It is usually held to be one of the world s great works of literature.
Divided into three major sectionsInferno, Purgatorio, and Paradisothe narrative traces the journey of Dante
from darkness and error to the revelation of the divine light, culminating in the Beatific Vision of God.
Dante is guided by the Roman poet Virgil, who represents the epitome of human knowledge, from the dark wood
through the descending circles of the pit of Hell (Inferno). He then climbs the mountain of Purgatory, guided
by the Roman poet Statius, who represents the fulfilment of human knowledge, and is finally led by his lifelong love,
the Beatrice of his earlier poetry, through the celestial spheres of Paradise.
"""
pdf_scraper_graph = PDFScraperGraph(
prompt="Summarize the text and find the main topics",
source=source,
config=graph_config,
)
result = pdf_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,74 @@
"""
Module for showing how PDFScraper multi works
"""
import os
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import PdfScraperMultiGraph
load_dotenv()
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ***************
# Covert to list
# ***************
sources = [
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
]
prompt = """
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
Response Format: For each abstract, present your response in the following structured format:
Independent Variable (IV):
Dependent Variable (DV):
Exogenous Shock:
Example Queries and Responses:
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
Response:
Independent Variable (IV): Employee happiness.
Dependent Variable (DV): Overall firm productivity.
Exogenous Shock: Sudden company-wide increase in bonus payments.
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
Response:
Independent Variable (IV): Exposure to social media.
Dependent Variable (DV): Mental health outcomes.
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
"""
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = PdfScraperMultiGraph(
prompt=prompt,
source= sources,
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,55 @@
"""
Basic example of scraping pipeline using SmartScraper from text
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Read the text file
# ************************************************
FILE_NAME = "inputs/plain_html_example.txt"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
# It could be also a http request using the request model
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the news with their description.",
source=text,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,45 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects",
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,54 @@
"""
Basic example of scraping pipeline using ScriptCreatorGraph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorMultiGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"library": "beautifulsoup"
}
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
urls=[
"https://schultzbergagency.com/emil-raste-karlsen/",
"https://schultzbergagency.com/johanna-hedberg/",
]
# ************************************************
# Create the ScriptCreatorGraph instance and run it
# ************************************************
script_creator_graph = ScriptCreatorMultiGraph(
prompt="Find information about actors",
# also accepts a string with the already downloaded HTML code
source=urls,
config=graph_config
)
result = script_creator_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = script_creator_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,62 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
load_dotenv()
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from pydantic import BaseModel, Field
from typing import List
# ************************************************
# Define the output schema for the graph
# ************************************************
class Dish(BaseModel):
name: str = Field(description="The name of the dish")
description: str = Field(description="The description of the dish")
class Dishes(BaseModel):
dishes: List[Dish]
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's famous dishes",
config=graph_config,
schema=Dishes
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,35 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"max_results": 2,
"verbose": True,
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)

View File

@ -0,0 +1,46 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
# ************************************************
# Define the configuration for the graph
# ************************************************
load_dotenv()
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me the best escursions near Trento",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,41 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os, json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,59 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
from typing import List
from pydantic import BaseModel, Field
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
class Project(BaseModel):
title: str = Field(description="The title of the project")
description: str = Field(description="The description of the project")
class Projects(BaseModel):
projects: List[Project]
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
schema=Projects,
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,46 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description.",
# also accepts a string with the already downloaded HTML code
source="https://perinim.github.io/projects/",
config=graph_config
)
result = smart_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = smart_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -0,0 +1,58 @@
"""
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the XMLScraperMultiGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperMultiGraph(
prompt="List me all the authors, title and genres of the books",
source=[text, text], # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -0,0 +1,60 @@
"""
Basic example of scraping pipeline using XMLScraperGraph from XML documents
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Read the XML file
# ************************************************
FILE_NAME = "inputs/books.xml"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)
with open(file_path, 'r', encoding="utf-8") as file:
text = file.read()
# ************************************************
# Define the configuration for the graph
# ************************************************
together_key = os.getenv("TOGETHER_APIKEY")
graph_config = {
"llm": {
"model": "togetherai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
"api_key": together_key,
},
"verbose": True,
}
# ************************************************
# Create the XMLScraperGraph instance and run it
# ************************************************
xml_scraper_graph = XMLScraperGraph(
prompt="List me all the authors, title and genres of the books",
source=text, # Pass the content of the file, not the file object
config=graph_config
)
result = xml_scraper_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -75,6 +75,7 @@ other-language-models = [
"langchain-anthropic>=0.1.11",
"langchain-huggingface>=0.0.3",
"langchain-nvidia-ai-endpoints>=0.1.6",
"langchain_together>=1.2.9"
]
# Group 2: More Semantic Options

View File

@ -129,7 +129,7 @@ class AbstractGraph(ABC):
known_providers = {"openai", "azure_openai", "google_genai", "google_vertexai",
"ollama", "oneapi", "nvidia", "groq", "anthropic" "bedrock", "mistralai",
"hugging_face", "deepseek", "ernie", "fireworks"}
"hugging_face", "deepseek", "ernie", "fireworks", "togetherai"}
split_model_provider = llm_params["model"].split("/", 1)
llm_params["model_provider"] = split_model_provider[0]
@ -145,7 +145,7 @@ class AbstractGraph(ABC):
self.model_token = 8192
try:
if llm_params["model_provider"] not in {"oneapi", "nvidia", "ernie", "deepseek"}:
if llm_params["model_provider"] not in {"oneapi", "nvidia", "ernie", "deepseek", "togetherai"}:
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return init_chat_model(**llm_params)
@ -157,14 +157,21 @@ class AbstractGraph(ABC):
from langchain_community.chat_models import ErnieBotChat
return ErnieBotChat(**llm_params)
if llm_params["model_provider"] == "oneapi":
elif llm_params["model_provider"] == "oneapi":
return OneApi(**llm_params)
if llm_params["model_provider"] == "nvidia":
elif llm_params["model_provider"] == "togehterai":
try:
from langchain_together import ChatTogether
except ImportError:
raise ImportError("The langchain_together module is not installed. Please install it using `pip install scrapegraphai[other-language-models]`.")
return ChatTogether(**llm_params)
elif llm_params["model_provider"] == "nvidia":
try:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
except ImportError:
raise ImportError("The langchain_nvidia_ai_endpoints module is not installed. Please install it using `pip install langchain_nvidia_ai_endpoints`.")
raise ImportError("The langchain_nvidia_ai_endpoints module is not installed. Please install it using `pip install scrapegraphai[other-language-models]`.")
return ChatNVIDIA(**llm_params)
except Exception as e:

View File

@ -127,6 +127,9 @@ models_tokens = {
"gemma-7b-it": 8192,
"claude-3-haiku-20240307'": 8192,
},
"togheterai": {
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": 128000
},
"anthropic": {
"claude_instant": 100000,
"claude2": 9000,