diff --git a/examples/smart_scraper_example.py b/examples/smart_scraper_example.py index 3e4601d2..e9911126 100644 --- a/examples/smart_scraper_example.py +++ b/examples/smart_scraper_example.py @@ -4,7 +4,7 @@ Basic example of scraping pipeline using SmartScraper import os from dotenv import load_dotenv -from scrapegraphai.graphs import SmartScraper +from scrapegraphai.graphs import SmartScraperGraph load_dotenv() @@ -15,8 +15,8 @@ llm_config = { "model_name": "gpt-3.5-turbo", } -smart_scraper = SmartScraper("List me all the titles and project descriptions", +smart_scraper_graph = SmartScraperGraph("List me all the titles and project descriptions", "https://perinim.github.io/projects/", llm_config) -answer = smart_scraper.run() +answer = smart_scraper_graph.run() print(answer) diff --git a/examples/speech_summary_graph_example.py b/examples/speech_summary_graph_example.py new file mode 100644 index 00000000..4276dbc5 --- /dev/null +++ b/examples/speech_summary_graph_example.py @@ -0,0 +1,26 @@ +""" +Basic example of scraping pipeline using SpeechSummaryGraph +""" + +import os +from dotenv import load_dotenv +from scrapegraphai.graphs import SpeechSummaryGraph + +load_dotenv() + +# Define the configuration for the language model +openai_key = os.getenv("OPENAI_APIKEY") +llm_config = { + "api_key": openai_key, +} + +# Save the audio to a file +curr_dir = os.path.dirname(os.path.realpath(__file__)) +output_file_path = os.path.join(curr_dir, "website_summary.mp3") + +speech_summary_graph = SpeechSummaryGraph("Make a summary of the webpage to be converted to speech for blind people.", + "https://perinim.github.io/projects/", llm_config, + output_file_path) + +final_state = speech_summary_graph.run() +print(final_state.get("answer", "No answer found.")) diff --git a/scrapegraphai/graphs/__init__.py b/scrapegraphai/graphs/__init__.py index 344e6a12..fde11ca8 100644 --- a/scrapegraphai/graphs/__init__.py +++ b/scrapegraphai/graphs/__init__.py @@ -2,4 +2,5 @@ __init__.py file for graphs folder """ from .base_graph import BaseGraph -from .smart_scraper_graph import SmartScraper +from .smart_scraper_graph import SmartScraperGraph +from .speech_summary_graph import SpeechSummaryGraph diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index bae35616..9bde10d1 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -9,7 +9,7 @@ from ..nodes import ( ) -class SmartScraper: +class SmartScraperGraph: """ SmartScraper is a comprehensive web scraping tool that automates the process of extracting information from web pages using a natural language model to interpret and answer prompts. diff --git a/scrapegraphai/graphs/speech_summary_graph.py b/scrapegraphai/graphs/speech_summary_graph.py new file mode 100644 index 00000000..4aaa0a0e --- /dev/null +++ b/scrapegraphai/graphs/speech_summary_graph.py @@ -0,0 +1,121 @@ +from ..models import OpenAI, OpenAITextToSpeech +from .base_graph import BaseGraph +from ..nodes import ( + FetchHTMLNode, + ConditionalNode, + GetProbableTagsNode, + GenerateAnswerNode, + ParseHTMLNode, + TextToSpeechNode, + ) +from scrapegraphai.utils import save_audio_from_bytes + +class SpeechSummaryGraph: + """ + SpeechSummaryGraph is a tool that automates the process of extracting and summarizing + information from web pages, then converting that summary into spoken word via an MP3 file. + + Attributes: + url (str): The URL of the web page to scrape and summarize. + llm_config (dict): Configuration parameters for the language model, with 'api_key' mandatory. + summary_prompt (str): The prompt used to guide the summarization process. + output_path (Path): The path where the generated MP3 file will be saved. + + Methods: + run(): Executes the web scraping, summarization, and text-to-speech process. + + Args: + url (str): The URL of the web page to scrape and summarize. + llm_config (dict): A dictionary containing configuration options for the language model. + summary_prompt (str): The prompt used to guide the summarization process. + output_path (str): The file path where the generated MP3 should be saved. + """ + + def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str): + """ + Initializes the SmartScraper with a prompt, URL, and language model configuration. + """ + self.prompt = f"{prompt} - Save the summary in a key called 'summary'." + self.url = url + self.llm_config = llm_config + self.llm = self._create_llm() + self.output_path = output_path + self.text_to_speech_model = OpenAITextToSpeech(llm_config, model="tts-1", voice="alloy") + self.graph = self._create_graph() + + + def _create_llm(self): + """ + Creates an instance of the ChatOpenAI class with the provided language model configuration. + + Returns: + ChatOpenAI: An instance of the ChatOpenAI class. + + Raises: + ValueError: If 'api_key' is not provided in llm_config. + """ + llm_defaults = { + "model_name": "gpt-3.5-turbo", + "temperature": 0, + "streaming": True + } + # Update defaults with any LLM parameters that were provided + llm_params = {**llm_defaults, **self.llm_config} + # Ensure the api_key is set, raise an error if it's not + if "api_key" not in llm_params: + raise ValueError("LLM configuration must include an 'api_key'.") + # Create the ChatOpenAI instance with the provided and default parameters + return OpenAI(llm_params) + + def _create_graph(self): + """ + Creates the graph of nodes representing the workflow for web scraping. + + Returns: + BaseGraph: An instance of the BaseGraph class. + """ + fetch_html_node = FetchHTMLNode("fetch_html") + get_probable_tags_node = GetProbableTagsNode( + self.llm, "get_probable_tags") + parse_document_node = ParseHTMLNode("parse_document") + generate_answer_node = GenerateAnswerNode(self.llm, "generate_answer") + conditional_node = ConditionalNode( + "conditional", [parse_document_node, generate_answer_node]) + text_to_speech_node = TextToSpeechNode( + self.text_to_speech_model, "text_to_speech") + + return BaseGraph( + nodes={ + fetch_html_node, + get_probable_tags_node, + conditional_node, + parse_document_node, + generate_answer_node, + text_to_speech_node + }, + edges={ + (fetch_html_node, get_probable_tags_node), + (get_probable_tags_node, conditional_node), + (parse_document_node, generate_answer_node), + (generate_answer_node, text_to_speech_node) + }, + entry_point=fetch_html_node + ) + + def run(self) -> str: + """ + Executes the scraping process by running the graph and returns the extracted information. + + Returns: + str: The answer extracted from the web page, corresponding to the given prompt. + """ + inputs = {"user_input": self.prompt, "url": self.url} + final_state = self.graph.execute(inputs) + + audio = final_state.get("audio", None) + if not audio: + raise ValueError("No audio generated from the text.") + save_audio_from_bytes(audio, self.output_path) + print(f"Audio saved to {self.output_path}") + + return final_state diff --git a/scrapegraphai/nodes/image_to_text_node.py b/scrapegraphai/nodes/image_to_text_node.py index 59979fc0..5b859fbc 100644 --- a/scrapegraphai/nodes/image_to_text_node.py +++ b/scrapegraphai/nodes/image_to_text_node.py @@ -34,6 +34,8 @@ class ImageToTextNode(BaseNode): url (str): url of the image where to :return: The updated state after executing this node. """ + + print("---GENERATING TEXT FROM IMAGE---") text_answer = self.llm.run(url) state.update({"image_text": text_answer}) diff --git a/scrapegraphai/nodes/text_to_speech_node.py b/scrapegraphai/nodes/text_to_speech_node.py index 127a9d7d..5d4da4cf 100644 --- a/scrapegraphai/nodes/text_to_speech_node.py +++ b/scrapegraphai/nodes/text_to_speech_node.py @@ -23,7 +23,7 @@ class TextToSpeechNode(BaseNode): super().__init__(node_name, "node") self.llm = llm - def execute(self, state: dict, text: str) -> dict: + def execute(self, state: dict, text: str | None = None) -> dict: """ Execute the node's logic and return the updated state. Args: @@ -33,7 +33,12 @@ class TextToSpeechNode(BaseNode): :return: The updated state after executing this node. """ - audio = self.llm.run(text) + text2translate = state.get("answer", None) + if not text2translate: + raise ValueError("No text to translate to speech.") + + print("---TRANSLATING TEXT TO SPEECH---") + audio = self.llm.run(text2translate["summary"]) state.update({"audio": audio}) return state