mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
Merge pull request #323 from VinciGit00/refactoring-pdf_scraper
Refactoring pdf scraper and json scrape
This commit is contained in:
commit
79ace115c7
55
examples/anthropic/csv_scraper_graph_multi_haiku.py
Normal file
55
examples/anthropic/csv_scraper_graph_multi_haiku.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
"max_tokens": 4000},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
36
examples/anthropic/json_scraper_multi_haiku.py
Normal file
36
examples/anthropic/json_scraper_multi_haiku.py
Normal file
@ -0,0 +1,36 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
"max_tokens": 4000
|
||||
},
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
@ -1,10 +1,12 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os, json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PDFScraperGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
72
examples/anthropic/pdf_scraper_multi_haiku.py
Normal file
72
examples/anthropic/pdf_scraper_multi_haiku.py
Normal file
@ -0,0 +1,72 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
"max_tokens": 4000
|
||||
},
|
||||
}
|
||||
|
||||
# ***************
|
||||
# Covert to list
|
||||
# ***************
|
||||
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
55
examples/anthropic/xml_scraper_graph_multi_haiku.py
Normal file
55
examples/anthropic/xml_scraper_graph_multi_haiku.py
Normal file
@ -0,0 +1,55 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": os.getenv("ANTHROPIC_API_KEY"),
|
||||
"model": "claude-3-haiku-20240307",
|
||||
"max_tokens": 4000},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
59
examples/bedrock/csv_scraper_graph_multi_bedrock.py
Normal file
59
examples/bedrock/csv_scraper_graph_multi_bedrock.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
59
examples/bedrock/xml_scraper_graph_multi_bedrock.py
Normal file
59
examples/bedrock/xml_scraper_graph_multi_bedrock.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"client": "client_name",
|
||||
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
"temperature": 0.0
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "bedrock/cohere.embed-multilingual-v3"
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
58
examples/deepseek/csv_scraper_graph_multi_deepseek.py
Normal file
58
examples/deepseek/csv_scraper_graph_multi_deepseek.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "deepseek-chat",
|
||||
"openai_api_key": deepseek_key,
|
||||
"openai_api_base": 'https://api.deepseek.com/v1',
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
38
examples/deepseek/json_scraper_multi_deepseek.py
Normal file
38
examples/deepseek/json_scraper_multi_deepseek.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "deepseek-chat",
|
||||
"openai_api_key": deepseek_key,
|
||||
"openai_api_base": 'https://api.deepseek.com/v1',
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
75
examples/deepseek/pdf_scraper_multi_deepseek.py
Normal file
75
examples/deepseek/pdf_scraper_multi_deepseek.py
Normal file
@ -0,0 +1,75 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "deepseek-chat",
|
||||
"openai_api_key": deepseek_key,
|
||||
"openai_api_base": 'https://api.deepseek.com/v1',
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ***************
|
||||
# Covert to list
|
||||
# ***************
|
||||
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
58
examples/deepseek/xml_scraper_graph_multi_deepseek.py
Normal file
58
examples/deepseek/xml_scraper_graph_multi_deepseek.py
Normal file
@ -0,0 +1,58 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
deepseek_key = os.getenv("DEEPSEEK_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "deepseek-chat",
|
||||
"openai_api_key": deepseek_key,
|
||||
"openai_api_base": 'https://api.deepseek.com/v1',
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
57
examples/gemini/csv_scraper_graph_multi_gemini.py
Normal file
57
examples/gemini/csv_scraper_graph_multi_gemini.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": gemini_key,
|
||||
"model": "gemini-pro",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
38
examples/gemini/json_scraper_multi_gemini.py
Normal file
38
examples/gemini/json_scraper_multi_gemini.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": gemini_key,
|
||||
"model": "gemini-pro",
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
74
examples/gemini/pdf_scraper_multi_gemini.py
Normal file
74
examples/gemini/pdf_scraper_multi_gemini.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": gemini_key,
|
||||
"model": "gemini-pro",
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ***************
|
||||
# Covert to list
|
||||
# ***************
|
||||
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
57
examples/gemini/xml_scraper_graph_multi_gemini.py
Normal file
57
examples/gemini/xml_scraper_graph_multi_gemini.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
gemini_key = os.getenv("GOOGLE_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": gemini_key,
|
||||
"model": "gemini-pro",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
59
examples/groq/csv_scraper_graph_multi_groq.py
Normal file
59
examples/groq/csv_scraper_graph_multi_groq.py
Normal file
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"headless": False
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
38
examples/groq/json_scraper_multi_groq.py
Normal file
38
examples/groq/json_scraper_multi_groq.py
Normal file
@ -0,0 +1,38 @@
|
||||
"""
|
||||
Module for showing how JSONScraperMultiGraph multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
74
examples/groq/pdf_scraper_multi_groq.py
Normal file
74
examples/groq/pdf_scraper_multi_groq.py
Normal file
@ -0,0 +1,74 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"library": "beautifulsoup"
|
||||
}
|
||||
|
||||
# ***************
|
||||
# Covert to list
|
||||
# ***************
|
||||
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
60
examples/groq/xml_scraper_graph_multi_groq.py
Normal file
60
examples/groq/xml_scraper_graph_multi_groq.py
Normal file
@ -0,0 +1,60 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
groq_key = os.getenv("GROQ_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "groq/gemma-7b-it",
|
||||
"api_key": groq_key,
|
||||
"temperature": 0
|
||||
},
|
||||
"headless": False
|
||||
}
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
46
examples/huggingfacehub/json_scraper_multi_huggingfacehub.py
Normal file
46
examples/huggingfacehub/json_scraper_multi_huggingfacehub.py
Normal file
@ -0,0 +1,46 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
||||
|
||||
load_dotenv()
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
||||
|
||||
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
|
||||
llm_model_instance = HuggingFaceEndpoint(
|
||||
repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
|
||||
)
|
||||
|
||||
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
|
||||
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
||||
)
|
||||
|
||||
graph_config = {
|
||||
"llm": {"model_instance": llm_model_instance},
|
||||
"embeddings": {"model_instance": embedder_model_instance}
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
79
examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py
Normal file
79
examples/huggingfacehub/pdf_scraper_multi_huggingfacehub.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
from langchain_community.llms import HuggingFaceEndpoint
|
||||
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
|
||||
load_dotenv()
|
||||
|
||||
HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')
|
||||
|
||||
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
|
||||
llm_model_instance = HuggingFaceEndpoint(
|
||||
repo_id=repo_id, max_length=128, temperature=0.5, token=HUGGINGFACEHUB_API_TOKEN
|
||||
)
|
||||
|
||||
embedder_model_instance = HuggingFaceInferenceAPIEmbeddings(
|
||||
api_key=HUGGINGFACEHUB_API_TOKEN, model_name="sentence-transformers/all-MiniLM-l6-v2"
|
||||
)
|
||||
|
||||
graph_config = {
|
||||
"llm": {"model_instance": llm_model_instance},
|
||||
"embeddings": {"model_instance": embedder_model_instance}
|
||||
}
|
||||
|
||||
# Covert to list
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
62
examples/local_models/csv_scraper_graph_multi_ollama.py
Normal file
62
examples/local_models/csv_scraper_graph_multi_ollama.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "model_tokens": 2000, # set context length arbitrarily
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
39
examples/local_models/json_scraper_multi_ollama.py
Normal file
39
examples/local_models/json_scraper_multi_ollama.py
Normal file
@ -0,0 +1,39 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
"model_tokens": 4000,
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
71
examples/local_models/pdf_scraper_multi_ollama.py
Normal file
71
examples/local_models/pdf_scraper_multi_ollama.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import json
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
"model_tokens": 4000,
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# Covert to list
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
62
examples/local_models/xml_scraper_graph_multi_ollama.py
Normal file
62
examples/local_models/xml_scraper_graph_multi_ollama.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"model": "ollama/llama3",
|
||||
"temperature": 0,
|
||||
"format": "json", # Ollama needs the format to be specified explicitly
|
||||
# "model_tokens": 2000, # set context length arbitrarily
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"verbose": True,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
0
examples/oneapi/csv_scraper_graph_multi_oneapi.py
Normal file
0
examples/oneapi/csv_scraper_graph_multi_oneapi.py
Normal file
32
examples/oneapi/json_scraper_multi_oneapi..py
Normal file
32
examples/oneapi/json_scraper_multi_oneapi..py
Normal file
@ -0,0 +1,32 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": "***************************",
|
||||
"model": "oneapi/qwen-turbo",
|
||||
"base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL
|
||||
}
|
||||
}
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
@ -3,10 +3,8 @@ Basic example of scraping pipeline using JSONScraperGraph from JSON documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the JSON file
|
||||
@ -23,8 +21,6 @@ with open(file_path, 'r', encoding="utf-8") as file:
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": "***************************",
|
||||
|
||||
70
examples/oneapi/pdf_scraper_multi_oneapi.py
Normal file
70
examples/oneapi/pdf_scraper_multi_oneapi.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
}
|
||||
|
||||
# Covert to list
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
57
examples/oneapi/xml_scraper_graph_multi_oneapi.py
Normal file
57
examples/oneapi/xml_scraper_graph_multi_oneapi.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
@ -23,7 +23,7 @@ with open(file_path, 'r', encoding="utf-8") as file:
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
openai_key = os.getenv("ONEAPI_KEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
|
||||
56
examples/openai/csv_scraper_graph_multi_openai.py
Normal file
56
examples/openai/csv_scraper_graph_multi_openai.py
Normal file
@ -0,0 +1,56 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import pandas as pd
|
||||
from scrapegraphai.graphs import CSVScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
# ************************************************
|
||||
# Read the CSV file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/username.csv"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
text = pd.read_csv(file_path)
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": "***************************",
|
||||
"model": "oneapi/qwen-turbo",
|
||||
"base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL
|
||||
}
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Create the CSVScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
csv_scraper_graph = CSVScraperMultiGraph(
|
||||
prompt="List me all the last names",
|
||||
source=[str(text), str(text)],
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = csv_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = csv_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
@ -9,7 +9,6 @@ from scrapegraphai.utils import prettify_exec_info
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
37
examples/openai/json_scraper_multi_openai.py
Normal file
37
examples/openai/json_scraper_multi_openai.py
Normal file
@ -0,0 +1,37 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import JSONScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
}
|
||||
|
||||
FILE_NAME = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
sources = [text, text]
|
||||
|
||||
multiple_search_graph = JSONScraperMultiGraph(
|
||||
prompt= "List me all the authors, title and genres of the books",
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
70
examples/openai/pdf_scraper_multi_openai.py
Normal file
70
examples/openai/pdf_scraper_multi_openai.py
Normal file
@ -0,0 +1,70 @@
|
||||
"""
|
||||
Module for showing how PDFScraper multi works
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import PdfScraperMultiGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
}
|
||||
|
||||
# Covert to list
|
||||
sources = [
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
"This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather—the interaction between call center architecture and outdoor weather conditions—in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity – largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.",
|
||||
]
|
||||
|
||||
prompt = """
|
||||
You are an expert in reviewing academic manuscripts. Please analyze the abstracts provided from an academic journal article to extract and clearly identify the following elements:
|
||||
|
||||
Independent Variable (IV): The variable that is manipulated or considered as the primary cause affecting other variables.
|
||||
Dependent Variable (DV): The variable that is measured or observed, which is expected to change as a result of variations in the Independent Variable.
|
||||
Exogenous Shock: Identify any external or unexpected events used in the study that serve as a natural experiment or provide a unique setting for observing the effects on the IV and DV.
|
||||
Response Format: For each abstract, present your response in the following structured format:
|
||||
|
||||
Independent Variable (IV):
|
||||
Dependent Variable (DV):
|
||||
Exogenous Shock:
|
||||
|
||||
Example Queries and Responses:
|
||||
|
||||
Query: This paper provides evidence from a natural experiment on the relationship between positive affect and productivity. We link highly detailed administrative data on the behaviors and performance of all telesales workers at a large telecommunications company with survey reports of employee happiness that we collected on a weekly basis. We use variation in worker mood arising from visual exposure to weather the interaction between call center architecture and outdoor weather conditions in order to provide a quasi-experimental test of the effect of happiness on productivity. We find evidence of a positive impact on sales performance, which is driven by changes in labor productivity largely through workers converting more calls into sales, and to a lesser extent by making more calls per hour and adhering more closely to their schedule. We find no evidence in our setting of effects on measures of high-frequency labor supply such as attendance and break-taking.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Employee happiness.
|
||||
Dependent Variable (DV): Overall firm productivity.
|
||||
Exogenous Shock: Sudden company-wide increase in bonus payments.
|
||||
|
||||
Query: The diffusion of social media coincided with a worsening of mental health conditions among adolescents and young adults in the United States, giving rise to speculation that social media might be detrimental to mental health. In this paper, we provide quasi-experimental estimates of the impact of social media on mental health by leveraging a unique natural experiment: the staggered introduction of Facebook across U.S. colleges. Our analysis couples data on student mental health around the years of Facebook's expansion with a generalized difference-in-differences empirical strategy. We find that the roll-out of Facebook at a college increased symptoms of poor mental health, especially depression. We also find that, among students predicted to be most susceptible to mental illness, the introduction of Facebook led to increased utilization of mental healthcare services. Lastly, we find that, after the introduction of Facebook, students were more likely to report experiencing impairments to academic performance resulting from poor mental health. Additional evidence on mechanisms suggests that the results are due to Facebook fostering unfavorable social comparisons.
|
||||
|
||||
Response:
|
||||
|
||||
Independent Variable (IV): Exposure to social media.
|
||||
Dependent Variable (DV): Mental health outcomes.
|
||||
Exogenous Shock: staggered introduction of Facebook across U.S. colleges.
|
||||
"""
|
||||
# *******************************************************
|
||||
# Create the SmartScraperMultiGraph instance and run it
|
||||
# *******************************************************
|
||||
|
||||
multiple_search_graph = PdfScraperMultiGraph(
|
||||
prompt=prompt,
|
||||
source= sources,
|
||||
schema=None,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = multiple_search_graph.run()
|
||||
print(json.dumps(result, indent=4))
|
||||
@ -2,7 +2,8 @@
|
||||
Basic example of scraping pipeline using SmartScraper
|
||||
"""
|
||||
|
||||
import os, json
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||
|
||||
|
||||
57
examples/openai/xml_scraper_graph_multi_ollama.py
Normal file
57
examples/openai/xml_scraper_graph_multi_ollama.py
Normal file
@ -0,0 +1,57 @@
|
||||
"""
|
||||
Basic example of scraping pipeline using XMLScraperMultiGraph from XML documents
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from scrapegraphai.graphs import XMLScraperMultiGraph
|
||||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Read the XML file
|
||||
# ************************************************
|
||||
|
||||
FILE_NAME = "inputs/books.xml"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, FILE_NAME)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": "***************************",
|
||||
"model": "oneapi/qwen-turbo",
|
||||
"base_url": "http://127.0.0.1:3000/v1", # 设置 OneAPI URL
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
# ************************************************
|
||||
# Create the XMLScraperMultiGraph instance and run it
|
||||
# ************************************************
|
||||
|
||||
xml_scraper_graph = XMLScraperMultiGraph(
|
||||
prompt="List me all the authors, title and genres of the books",
|
||||
source=[text, text], # Pass the content of the file, not the file object
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = xml_scraper_graph.run()
|
||||
print(result)
|
||||
|
||||
# ************************************************
|
||||
# Get graph execution info
|
||||
# ************************************************
|
||||
|
||||
graph_exec_info = xml_scraper_graph.get_execution_info()
|
||||
print(prettify_exec_info(graph_exec_info))
|
||||
|
||||
# Save to json or csv
|
||||
convert_to_csv(result, "result")
|
||||
convert_to_json(result, "result")
|
||||
@ -16,3 +16,7 @@ from .pdf_scraper_graph import PDFScraperGraph
|
||||
from .omni_scraper_graph import OmniScraperGraph
|
||||
from .omni_search_graph import OmniSearchGraph
|
||||
from .smart_scraper_multi_graph import SmartScraperMultiGraph
|
||||
from .pdf_scraper_multi import PdfScraperMultiGraph
|
||||
from .json_scraper_multi import JSONScraperMultiGraph
|
||||
from .csv_scraper_graph_multi import CSVScraperMultiGraph
|
||||
from .xml_scraper_graph_multi import XMLScraperMultiGraph
|
||||
|
||||
116
scrapegraphai/graphs/csv_scraper_graph_multi.py
Normal file
116
scrapegraphai/graphs/csv_scraper_graph_multi.py
Normal file
@ -0,0 +1,116 @@
|
||||
"""
|
||||
CSVScraperMultiGraph Module
|
||||
"""
|
||||
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .csv_scraper_graph import CSVScraperGraph
|
||||
|
||||
from ..nodes import (
|
||||
GraphIteratorNode,
|
||||
MergeAnswersNode
|
||||
)
|
||||
|
||||
|
||||
class CSVScraperMultiGraph(AbstractGraph):
|
||||
"""
|
||||
CSVScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
|
||||
It only requires a user prompt and a list of URLs.
|
||||
|
||||
Attributes:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
llm_model (dict): The configuration for the language model.
|
||||
embedder_model (dict): The configuration for the embedder model.
|
||||
headless (bool): A flag to run the browser in headless mode.
|
||||
verbose (bool): A flag to display the execution information.
|
||||
model_token (int): The token limit for the language model.
|
||||
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
... "What is Chioggia famous for?",
|
||||
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||
... )
|
||||
>>> result = search_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
if all(isinstance(value, str) for value in config.values()):
|
||||
self.copy_config = copy(config)
|
||||
else:
|
||||
self.copy_config = deepcopy(config)
|
||||
|
||||
super().__init__(prompt, config, source, schema)
|
||||
|
||||
def _create_graph(self) -> BaseGraph:
|
||||
"""
|
||||
Creates the graph of nodes representing the workflow for web scraping and searching.
|
||||
|
||||
Returns:
|
||||
BaseGraph: A graph instance representing the web scraping and searching workflow.
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Create a SmartScraperGraph instance
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_instance = CSVScraperGraph(
|
||||
prompt="",
|
||||
source="",
|
||||
config=self.copy_config,
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
graph_iterator_node = GraphIteratorNode(
|
||||
input="user_prompt & jsons",
|
||||
output=["results"],
|
||||
node_config={
|
||||
"graph_instance": smart_scraper_instance,
|
||||
}
|
||||
)
|
||||
|
||||
merge_answers_node = MergeAnswersNode(
|
||||
input="user_prompt & results",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": self.llm_model,
|
||||
"schema": self.schema
|
||||
}
|
||||
)
|
||||
|
||||
return BaseGraph(
|
||||
nodes=[
|
||||
graph_iterator_node,
|
||||
merge_answers_node,
|
||||
],
|
||||
edges=[
|
||||
(graph_iterator_node, merge_answers_node),
|
||||
],
|
||||
entry_point=graph_iterator_node
|
||||
)
|
||||
|
||||
def run(self) -> str:
|
||||
"""
|
||||
Executes the web scraping and searching process.
|
||||
|
||||
Returns:
|
||||
str: The answer to the prompt.
|
||||
"""
|
||||
inputs = {"user_prompt": self.prompt, "jsons": self.source}
|
||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
116
scrapegraphai/graphs/json_scraper_multi.py
Normal file
116
scrapegraphai/graphs/json_scraper_multi.py
Normal file
@ -0,0 +1,116 @@
|
||||
"""
|
||||
JSONScraperMultiGraph Module
|
||||
"""
|
||||
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .json_scraper_graph import JSONScraperGraph
|
||||
|
||||
from ..nodes import (
|
||||
GraphIteratorNode,
|
||||
MergeAnswersNode
|
||||
)
|
||||
|
||||
|
||||
class JSONScraperMultiGraph(AbstractGraph):
|
||||
"""
|
||||
JSONScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
|
||||
It only requires a user prompt and a list of URLs.
|
||||
|
||||
Attributes:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
llm_model (dict): The configuration for the language model.
|
||||
embedder_model (dict): The configuration for the embedder model.
|
||||
headless (bool): A flag to run the browser in headless mode.
|
||||
verbose (bool): A flag to display the execution information.
|
||||
model_token (int): The token limit for the language model.
|
||||
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
... "What is Chioggia famous for?",
|
||||
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||
... )
|
||||
>>> result = search_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
if all(isinstance(value, str) for value in config.values()):
|
||||
self.copy_config = copy(config)
|
||||
else:
|
||||
self.copy_config = deepcopy(config)
|
||||
|
||||
super().__init__(prompt, config, source, schema)
|
||||
|
||||
def _create_graph(self) -> BaseGraph:
|
||||
"""
|
||||
Creates the graph of nodes representing the workflow for web scraping and searching.
|
||||
|
||||
Returns:
|
||||
BaseGraph: A graph instance representing the web scraping and searching workflow.
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Create a SmartScraperGraph instance
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_instance = JSONScraperGraph(
|
||||
prompt="",
|
||||
source="",
|
||||
config=self.copy_config,
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
graph_iterator_node = GraphIteratorNode(
|
||||
input="user_prompt & jsons",
|
||||
output=["results"],
|
||||
node_config={
|
||||
"graph_instance": smart_scraper_instance,
|
||||
}
|
||||
)
|
||||
|
||||
merge_answers_node = MergeAnswersNode(
|
||||
input="user_prompt & results",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": self.llm_model,
|
||||
"schema": self.schema
|
||||
}
|
||||
)
|
||||
|
||||
return BaseGraph(
|
||||
nodes=[
|
||||
graph_iterator_node,
|
||||
merge_answers_node,
|
||||
],
|
||||
edges=[
|
||||
(graph_iterator_node, merge_answers_node),
|
||||
],
|
||||
entry_point=graph_iterator_node
|
||||
)
|
||||
|
||||
def run(self) -> str:
|
||||
"""
|
||||
Executes the web scraping and searching process.
|
||||
|
||||
Returns:
|
||||
str: The answer to the prompt.
|
||||
"""
|
||||
inputs = {"user_prompt": self.prompt, "jsons": self.source}
|
||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
@ -1,3 +1,4 @@
|
||||
|
||||
"""
|
||||
PDFScraperGraph Module
|
||||
"""
|
||||
@ -63,8 +64,9 @@ class PDFScraperGraph(AbstractGraph):
|
||||
input='pdf | pdf_dir',
|
||||
output=["doc"],
|
||||
)
|
||||
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & doc",
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={
|
||||
"llm_model": self.llm_model,
|
||||
|
||||
117
scrapegraphai/graphs/pdf_scraper_multi.py
Normal file
117
scrapegraphai/graphs/pdf_scraper_multi.py
Normal file
@ -0,0 +1,117 @@
|
||||
"""
|
||||
PdfScraperMultiGraph Module
|
||||
"""
|
||||
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .pdf_scraper_graph import PDFScraperGraph
|
||||
|
||||
from ..nodes import (
|
||||
GraphIteratorNode,
|
||||
MergeAnswersNode
|
||||
)
|
||||
|
||||
|
||||
class PdfScraperMultiGraph(AbstractGraph):
|
||||
"""
|
||||
PdfScraperMultiGraph is a scraping pipeline that scrapes a
|
||||
list of URLs and generates answers to a given prompt.
|
||||
It only requires a user prompt and a list of URLs.
|
||||
|
||||
Attributes:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
llm_model (dict): The configuration for the language model.
|
||||
embedder_model (dict): The configuration for the embedder model.
|
||||
headless (bool): A flag to run the browser in headless mode.
|
||||
verbose (bool): A flag to display the execution information.
|
||||
model_token (int): The token limit for the language model.
|
||||
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
... "What is Chioggia famous for?",
|
||||
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||
... )
|
||||
>>> result = search_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
if all(isinstance(value, str) for value in config.values()):
|
||||
self.copy_config = copy(config)
|
||||
else:
|
||||
self.copy_config = deepcopy(config)
|
||||
|
||||
super().__init__(prompt, config, source, schema)
|
||||
|
||||
def _create_graph(self) -> BaseGraph:
|
||||
"""
|
||||
Creates the graph of nodes representing the workflow for web scraping and searching.
|
||||
|
||||
Returns:
|
||||
BaseGraph: A graph instance representing the web scraping and searching workflow.
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Create a PDFScraperGraph instance
|
||||
# ************************************************
|
||||
|
||||
pdf_scraper_instance = PDFScraperGraph(
|
||||
prompt="",
|
||||
source="",
|
||||
config=self.copy_config,
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
graph_iterator_node = GraphIteratorNode(
|
||||
input="user_prompt & pdfs",
|
||||
output=["results"],
|
||||
node_config={
|
||||
"graph_instance": pdf_scraper_instance,
|
||||
}
|
||||
)
|
||||
|
||||
merge_answers_node = MergeAnswersNode(
|
||||
input="user_prompt & results",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": self.llm_model,
|
||||
"schema": self.schema
|
||||
}
|
||||
)
|
||||
|
||||
return BaseGraph(
|
||||
nodes=[
|
||||
graph_iterator_node,
|
||||
merge_answers_node,
|
||||
],
|
||||
edges=[
|
||||
(graph_iterator_node, merge_answers_node),
|
||||
],
|
||||
entry_point=graph_iterator_node
|
||||
)
|
||||
|
||||
def run(self) -> str:
|
||||
"""
|
||||
Executes the web scraping and searching process.
|
||||
|
||||
Returns:
|
||||
str: The answer to the prompt.
|
||||
"""
|
||||
inputs = {"user_prompt": self.prompt, "pdfs": self.source}
|
||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
@ -117,4 +117,4 @@ class SmartScraperGraph(AbstractGraph):
|
||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
|
||||
117
scrapegraphai/graphs/xml_scraper_graph_multi.py
Normal file
117
scrapegraphai/graphs/xml_scraper_graph_multi.py
Normal file
@ -0,0 +1,117 @@
|
||||
"""
|
||||
XMLScraperMultiGraph Module
|
||||
"""
|
||||
|
||||
from copy import copy, deepcopy
|
||||
from typing import List, Optional
|
||||
|
||||
from .base_graph import BaseGraph
|
||||
from .abstract_graph import AbstractGraph
|
||||
from .xml_scraper_graph import XMLScraperGraph
|
||||
|
||||
from ..nodes import (
|
||||
GraphIteratorNode,
|
||||
MergeAnswersNode
|
||||
)
|
||||
|
||||
|
||||
class XMLScraperMultiGraph(AbstractGraph):
|
||||
"""
|
||||
XMLScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and
|
||||
generates answers to a given prompt.
|
||||
It only requires a user prompt and a list of URLs.
|
||||
|
||||
Attributes:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
llm_model (dict): The configuration for the language model.
|
||||
embedder_model (dict): The configuration for the embedder model.
|
||||
headless (bool): A flag to run the browser in headless mode.
|
||||
verbose (bool): A flag to display the execution information.
|
||||
model_token (int): The token limit for the language model.
|
||||
|
||||
Args:
|
||||
prompt (str): The user prompt to search the internet.
|
||||
source (List[str]): The source of the graph.
|
||||
config (dict): Configuration parameters for the graph.
|
||||
schema (Optional[str]): The schema for the graph output.
|
||||
|
||||
Example:
|
||||
>>> search_graph = MultipleSearchGraph(
|
||||
... "What is Chioggia famous for?",
|
||||
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||
... )
|
||||
>>> result = search_graph.run()
|
||||
"""
|
||||
|
||||
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||
|
||||
self.max_results = config.get("max_results", 3)
|
||||
|
||||
if all(isinstance(value, str) for value in config.values()):
|
||||
self.copy_config = copy(config)
|
||||
else:
|
||||
self.copy_config = deepcopy(config)
|
||||
|
||||
super().__init__(prompt, config, source, schema)
|
||||
|
||||
def _create_graph(self) -> BaseGraph:
|
||||
"""
|
||||
Creates the graph of nodes representing the workflow for web scraping and searching.
|
||||
|
||||
Returns:
|
||||
BaseGraph: A graph instance representing the web scraping and searching workflow.
|
||||
"""
|
||||
|
||||
# ************************************************
|
||||
# Create a SmartScraperGraph instance
|
||||
# ************************************************
|
||||
|
||||
smart_scraper_instance = XMLScraperGraph(
|
||||
prompt="",
|
||||
source="",
|
||||
config=self.copy_config,
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
graph_iterator_node = GraphIteratorNode(
|
||||
input="user_prompt & jsons",
|
||||
output=["results"],
|
||||
node_config={
|
||||
"graph_instance": smart_scraper_instance,
|
||||
}
|
||||
)
|
||||
|
||||
merge_answers_node = MergeAnswersNode(
|
||||
input="user_prompt & results",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": self.llm_model,
|
||||
"schema": self.schema
|
||||
}
|
||||
)
|
||||
|
||||
return BaseGraph(
|
||||
nodes=[
|
||||
graph_iterator_node,
|
||||
merge_answers_node,
|
||||
],
|
||||
edges=[
|
||||
(graph_iterator_node, merge_answers_node),
|
||||
],
|
||||
entry_point=graph_iterator_node
|
||||
)
|
||||
|
||||
def run(self) -> str:
|
||||
"""
|
||||
Executes the web scraping and searching process.
|
||||
|
||||
Returns:
|
||||
str: The answer to the prompt.
|
||||
"""
|
||||
inputs = {"user_prompt": self.prompt, "jsons": self.source}
|
||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||
|
||||
return self.final_state.get("answer", "No answer found.")
|
||||
@ -19,4 +19,4 @@ from .generate_answer_csv_node import GenerateAnswerCSVNode
|
||||
from .generate_answer_pdf_node import GenerateAnswerPDFNode
|
||||
from .graph_iterator_node import GraphIteratorNode
|
||||
from .merge_answers_node import MergeAnswersNode
|
||||
from .generate_answer_omni_node import GenerateAnswerOmniNode
|
||||
from .generate_answer_omni_node import GenerateAnswerOmniNode
|
||||
|
||||
@ -49,7 +49,7 @@ class GenerateAnswerCSVNode(BaseNode):
|
||||
input: str,
|
||||
output: List[str],
|
||||
node_config: Optional[dict] = None,
|
||||
node_name: str = "GenerateAnswer",
|
||||
node_name: str = "GenerateAnswerCSV",
|
||||
):
|
||||
"""
|
||||
Initializes the GenerateAnswerNodeCsv with a language model client and a node name.
|
||||
|
||||
@ -48,7 +48,7 @@ class GenerateAnswerPDFNode(BaseNode):
|
||||
input: str,
|
||||
output: List[str],
|
||||
node_config: Optional[dict] = None,
|
||||
node_name: str = "GenerateAnswer",
|
||||
node_name: str = "GenerateAnswerPDF",
|
||||
):
|
||||
"""
|
||||
Initializes the GenerateAnswerNodePDF with a language model client and a node name.
|
||||
@ -96,9 +96,7 @@ class GenerateAnswerPDFNode(BaseNode):
|
||||
output_parser = JsonOutputParser()
|
||||
format_instructions = output_parser.get_format_instructions()
|
||||
|
||||
|
||||
chains_dict = {}
|
||||
|
||||
# Use tqdm to add progress bar
|
||||
for i, chunk in enumerate(
|
||||
tqdm(doc, desc="Processing chunks", disable=not self.verbose)
|
||||
@ -108,7 +106,7 @@ class GenerateAnswerPDFNode(BaseNode):
|
||||
template=template_no_chunks_pdf,
|
||||
input_variables=["question"],
|
||||
partial_variables={
|
||||
"context": chunk.page_content,
|
||||
"context":chunk,
|
||||
"format_instructions": format_instructions,
|
||||
},
|
||||
)
|
||||
@ -117,7 +115,7 @@ class GenerateAnswerPDFNode(BaseNode):
|
||||
template=template_chunks_pdf,
|
||||
input_variables=["question"],
|
||||
partial_variables={
|
||||
"context": chunk.page_content,
|
||||
"context":chunk,
|
||||
"chunk_id": i + 1,
|
||||
"format_instructions": format_instructions,
|
||||
},
|
||||
|
||||
@ -10,7 +10,6 @@ from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from tqdm import tqdm
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from the library
|
||||
|
||||
@ -3,10 +3,8 @@ GetProbableTagsNode Module
|
||||
"""
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
from .base_node import BaseNode
|
||||
|
||||
|
||||
@ -47,7 +47,7 @@ class RobotsNode(BaseNode):
|
||||
input: str,
|
||||
output: List[str],
|
||||
node_config: Optional[dict] = None,
|
||||
node_name: str = "Robots",
|
||||
node_name: str = "RobotNode",
|
||||
|
||||
):
|
||||
super().__init__(node_name, "node", input, output, 1)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user