diff --git a/examples/graph_examples/graph_evaluation_example.py b/examples/graph_examples/graph_evaluation_example.py index 4691654d..d55da3b8 100644 --- a/examples/graph_examples/graph_evaluation_example.py +++ b/examples/graph_examples/graph_evaluation_example.py @@ -4,7 +4,6 @@ Module for evaluating the graph import os from scrapegraphai.evaluators import TrulensEvaluator from dotenv import load_dotenv -import pandas as pd load_dotenv() @@ -19,9 +18,9 @@ llm_config = { list_of_inputs = [ ("List me all the titles and project descriptions", "https://perinim.github.io/projects/", llm_config), - ("Who is the author of the project?", - "https://perinim.github.io/projects/", llm_config), - ("What is the project about?", "https://perinim.github.io/projects/", llm_config) + # ("Who is the author of the project?", + # "https://perinim.github.io/projects/", llm_config), + # ("What is the project about?", "https://perinim.github.io/projects/", llm_config), ] # Create the TrulensEvaluator instance @@ -29,5 +28,4 @@ trulens_evaluator = TrulensEvaluator(openai_key) # Evaluate SmartScraperGraph on the list of inputs (results_df, answer) = trulens_evaluator.evaluate(list_of_inputs, dashboard=False) -# Print the results print(answer) diff --git a/examples/graph_examples/smart_scraper_example.py b/examples/graph_examples/smart_scraper_example.py index adf949c9..8de9246f 100644 --- a/examples/graph_examples/smart_scraper_example.py +++ b/examples/graph_examples/smart_scraper_example.py @@ -17,7 +17,7 @@ llm_config = { # Define URL and PROMPT URL = "https://perinim.github.io/projects/" -PROMPT = "List me all the titles and project descriptions" +PROMPT = "List me all the titles and project descriptions and give me an audio" # Create the SmartScraperGraph instance smart_scraper_graph = SmartScraperGraph(PROMPT, URL, llm_config) diff --git a/examples/graph_examples/speech_summary_graph_example.py b/examples/graph_examples/speech_summary_graph_example.py index 406b2c88..eff068ea 100644 --- a/examples/graph_examples/speech_summary_graph_example.py +++ b/examples/graph_examples/speech_summary_graph_example.py @@ -18,9 +18,9 @@ llm_config = { curr_dir = os.path.dirname(os.path.realpath(__file__)) output_file_path = os.path.join(curr_dir, "website_summary.mp3") -speech_summary_graph = SpeechSummaryGraph("""Make a summary of the webpage to be +speech_summary_graph = SpeechSummaryGraph("""Make a summary of the news to be converted to audio for blind people.""", - "https://perinim.github.io/projects/", llm_config, + "https://www.wired.com/category/science/", llm_config, output_file_path) final_state = speech_summary_graph.run() diff --git a/pyproject.toml b/pyproject.toml index ab0b9207..e4d4f7b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scrapegraphai" -version = "0.0.6" +version = "0.0.6-alpha" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." authors = [ "Marco Vinciguerra ", diff --git a/scrapegraphai/graphs/speech_summary_graph.py b/scrapegraphai/graphs/speech_summary_graph.py index 4cf3a32e..e6b83e74 100644 --- a/scrapegraphai/graphs/speech_summary_graph.py +++ b/scrapegraphai/graphs/speech_summary_graph.py @@ -6,10 +6,8 @@ from ..models import OpenAI, OpenAITextToSpeech from .base_graph import BaseGraph from ..nodes import ( FetchHTMLNode, - ConditionalNode, - GetProbableTagsNode, + RAGNode, GenerateAnswerNode, - ParseHTMLNode, TextToSpeechNode, ) @@ -36,7 +34,7 @@ class SpeechSummaryGraph: output_path (str): The file path where the generated MP3 should be saved. """ - def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str): + def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str = "website_summary.mp3"): """ Initializes the SmartScraper with a prompt, URL, and language model configuration. """ @@ -80,28 +78,21 @@ class SpeechSummaryGraph: BaseGraph: An instance of the BaseGraph class. """ fetch_html_node = FetchHTMLNode("fetch_html") - get_probable_tags_node = GetProbableTagsNode( - self.llm, "get_probable_tags") - parse_document_node = ParseHTMLNode("parse_document") + rag_node = RAGNode(self.llm, "rag") generate_answer_node = GenerateAnswerNode(self.llm, "generate_answer") - conditional_node = ConditionalNode( - "conditional", [parse_document_node, generate_answer_node]) text_to_speech_node = TextToSpeechNode( self.text_to_speech_model, "text_to_speech") return BaseGraph( nodes={ fetch_html_node, - get_probable_tags_node, - conditional_node, - parse_document_node, + rag_node, generate_answer_node, text_to_speech_node }, edges={ - (fetch_html_node, get_probable_tags_node), - (get_probable_tags_node, conditional_node), - (parse_document_node, generate_answer_node), + (fetch_html_node, rag_node), + (rag_node, generate_answer_node), (generate_answer_node, text_to_speech_node) }, entry_point=fetch_html_node