From ee533b8d74775821ec0d16d6bc57f4e9184bb8cb Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Tue, 9 Apr 2024 11:43:51 +0200 Subject: [PATCH] add examples for local models and gemini models --- examples/gemini/scrape_plain_text_gemini.py | 53 +++++++++++++++++++ examples/gemini/scrape_xml_gemini.py | 52 ++++++++++++++++++ examples/gemini/search_graph_gemini.py | 40 ++++++++++++++ examples/local/scrape_plain_text_local.py | 48 +++++++++++++++++ examples/local/scrape_xml_local.py | 47 ++++++++++++++++ examples/local/search_graph_local.py | 35 ++++++++++++ examples/local/smart_scraper_local.py | 2 +- ...ocument_openai.py => scrape_xml_openai.py} | 0 examples/openai/search_graph_openai.py | 4 -- 9 files changed, 276 insertions(+), 5 deletions(-) create mode 100644 examples/gemini/scrape_plain_text_gemini.py create mode 100644 examples/gemini/scrape_xml_gemini.py create mode 100644 examples/gemini/search_graph_gemini.py create mode 100644 examples/local/scrape_plain_text_local.py create mode 100644 examples/local/scrape_xml_local.py create mode 100644 examples/local/search_graph_local.py rename examples/openai/{scrape_xml_document_openai.py => scrape_xml_openai.py} (100%) diff --git a/examples/gemini/scrape_plain_text_gemini.py b/examples/gemini/scrape_plain_text_gemini.py new file mode 100644 index 00000000..43c4619f --- /dev/null +++ b/examples/gemini/scrape_plain_text_gemini.py @@ -0,0 +1,53 @@ +""" +Basic example of scraping pipeline using SmartScraper from text +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json +load_dotenv() + +# ************************************************ +# Read the text file +# ************************************************ + +FILE_NAME = "inputs/plain_html_example.txt" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +# It could be also a http request using the request model +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "gemini-pro", + "temperature": 0, + "streaming": True + }, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source=text, + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/gemini/scrape_xml_gemini.py b/examples/gemini/scrape_xml_gemini.py new file mode 100644 index 00000000..7891f04e --- /dev/null +++ b/examples/gemini/scrape_xml_gemini.py @@ -0,0 +1,52 @@ +""" +Basic example of scraping pipeline using SmartScraper from XML documents +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json +load_dotenv() + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "gemini-pro", + "temperature": 0, + "streaming": True + }, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/gemini/search_graph_gemini.py b/examples/gemini/search_graph_gemini.py new file mode 100644 index 00000000..d213cf38 --- /dev/null +++ b/examples/gemini/search_graph_gemini.py @@ -0,0 +1,40 @@ +""" +Example of Search Graph +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import SearchGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +gemini_key = os.getenv("GOOGLE_APIKEY") + +graph_config = { + "llm": { + "api_key": gemini_key, + "model": "gemini-pro", + "temperature": 0, + "streaming": True + }, +} + +# ************************************************ +# Create the SearchGraph instance and run it +# ************************************************ + +search_graph = SearchGraph( + prompt="List me all the regions of Italy.", + config=graph_config +) + +result = search_graph.run() +print(result) + +# Save to json and csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local/scrape_plain_text_local.py b/examples/local/scrape_plain_text_local.py new file mode 100644 index 00000000..186c788c --- /dev/null +++ b/examples/local/scrape_plain_text_local.py @@ -0,0 +1,48 @@ +""" +Basic example of scraping pipeline using SmartScraper from text +""" + +import os +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json + +# ************************************************ +# Read the text file +# ************************************************ + +FILE_NAME = "inputs/plain_html_example.txt" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +# It could be also a http request using the request model +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + }, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the news with their description.", + source=text, + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local/scrape_xml_local.py b/examples/local/scrape_xml_local.py new file mode 100644 index 00000000..6bc0ac40 --- /dev/null +++ b/examples/local/scrape_xml_local.py @@ -0,0 +1,47 @@ +""" +Basic example of scraping pipeline using SmartScraper from XML documents +""" +import os +from scrapegraphai.graphs import SmartScraperGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json + +# ************************************************ +# Read the XML file +# ************************************************ + +FILE_NAME = "inputs/books.xml" +curr_dir = os.path.dirname(os.path.realpath(__file__)) +file_path = os.path.join(curr_dir, FILE_NAME) + +with open(file_path, 'r', encoding="utf-8") as file: + text = file.read() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + }, +} + +# ************************************************ +# Create the SmartScraperGraph instance and run it +# ************************************************ + +smart_scraper_graph = SmartScraperGraph( + prompt="List me all the authors, title and genres of the books", + source=text, # Pass the content of the file, not the file object + config=graph_config +) + +result = smart_scraper_graph.run() +print(result) + +# Save to json or csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local/search_graph_local.py b/examples/local/search_graph_local.py new file mode 100644 index 00000000..15cc3aee --- /dev/null +++ b/examples/local/search_graph_local.py @@ -0,0 +1,35 @@ +""" +Example of Search Graph +""" + +from scrapegraphai.graphs import SearchGraph +from scrapegraphai.utils import convert_to_csv, convert_to_json + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + + +graph_config = { + "llm": { + "model": "ollama/mistral", + "temperature": 0, + "format": "json", # Ollama needs the format to be specified explicitly + }, +} + +# ************************************************ +# Create the SearchGraph instance and run it +# ************************************************ + +search_graph = SearchGraph( + prompt="List me all the regions of Italy.", + config=graph_config +) + +result = search_graph.run() +print(result) + +# Save to json and csv +convert_to_csv(result, "result") +convert_to_json(result, "result") diff --git a/examples/local/smart_scraper_local.py b/examples/local/smart_scraper_local.py index f474c15f..f9b639a3 100644 --- a/examples/local/smart_scraper_local.py +++ b/examples/local/smart_scraper_local.py @@ -30,7 +30,7 @@ graph_config = { smart_scraper_graph = SmartScraperGraph( prompt="List me all the news with their description.", # also accepts a string with the already downloaded HTML code - source="https://www.wired.com", + source="https://perinim.github.io/projects", config=graph_config ) diff --git a/examples/openai/scrape_xml_document_openai.py b/examples/openai/scrape_xml_openai.py similarity index 100% rename from examples/openai/scrape_xml_document_openai.py rename to examples/openai/scrape_xml_openai.py diff --git a/examples/openai/search_graph_openai.py b/examples/openai/search_graph_openai.py index 23322110..63217c7f 100644 --- a/examples/openai/search_graph_openai.py +++ b/examples/openai/search_graph_openai.py @@ -19,10 +19,6 @@ graph_config = { "api_key": openai_key, "model": "gpt-3.5-turbo", }, - "embeddings": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", - }, } # ************************************************