diff --git a/examples/benchmarks/SmartScraper/benchmark_openai_gpt4o.py b/examples/benchmarks/SmartScraper/benchmark_openai_gpt4o.py new file mode 100644 index 00000000..aa273c5b --- /dev/null +++ b/examples/benchmarks/SmartScraper/benchmark_openai_gpt4o.py @@ -0,0 +1,53 @@ +""" +Basic example of scraping pipeline using SmartScraper from text +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import prettify_exec_info +load_dotenv() + +# ************************************************ +# Read the text file +# ************************************************ +files = ["inputs/example_1.txt", "inputs/example_2.txt"] +tasks = ["List me all the projects with their description.", + "List me all the articles with their description."] + + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +openai_key = os.getenv("OPENAI_APIKEY") + +graph_config = { + "llm": { + "api_key": openai_key, + "model": "gpt-4o", + }, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +for i in range(0, 2): + with open(files[i], 'r', encoding="utf-8") as file: + text = file.read() + + smart_scraper_graph = SmartScraperGraph( + prompt=tasks[i], + source=text, + config=graph_config + ) + + result = smart_scraper_graph.run() + print(result) + # ************************************************ + # Get graph execution info + # ************************************************ + + graph_exec_info = smart_scraper_graph.get_execution_info() + print(prettify_exec_info(graph_exec_info)) diff --git a/examples/local_models/smart_scraper_ollama.py b/examples/local_models/smart_scraper_ollama.py index 8c17ffa6..13fd7d12 100644 --- a/examples/local_models/smart_scraper_ollama.py +++ b/examples/local_models/smart_scraper_ollama.py @@ -28,7 +28,7 @@ graph_config = { # ************************************************ smart_scraper_graph = SmartScraperGraph( - prompt="List me all the titles", + prompt="List me all the titles of the articles", # also accepts a string with the already downloaded HTML code source="https://www.wired.com/", config=graph_config