working speech graph with rag

2026-06-23 21:00:30 +08:00 · 2024-02-23 14:17:46 +01:00 · 2024-02-23 14:17:46 +01:00 · ed64db24df
commit ed64db24df
parent 486294e5c2
5 changed files with 13 additions and 24 deletions
--- a/examples/graph_examples/graph_evaluation_example.py
+++ b/examples/graph_examples/graph_evaluation_example.py
@ -4,7 +4,6 @@ Module for evaluating the graph
 import os
 from scrapegraphai.evaluators import TrulensEvaluator
 from dotenv import load_dotenv
-import pandas as pd

 load_dotenv()

@ -19,9 +18,9 @@ llm_config = {
 list_of_inputs = [
    ("List me all the titles and project descriptions",
     "https://perinim.github.io/projects/", llm_config),
-    ("Who is the author of the project?",
-     "https://perinim.github.io/projects/", llm_config),
-    ("What is the project about?", "https://perinim.github.io/projects/", llm_config)
+    # ("Who is the author of the project?",
+    #  "https://perinim.github.io/projects/", llm_config),
+    # ("What is the project about?", "https://perinim.github.io/projects/", llm_config),
 ]

 # Create the TrulensEvaluator instance
@ -29,5 +28,4 @@ trulens_evaluator = TrulensEvaluator(openai_key)
 # Evaluate SmartScraperGraph on the list of inputs
 (results_df, answer) = trulens_evaluator.evaluate(list_of_inputs, dashboard=False)

-# Print the results
 print(answer)
--- a/examples/graph_examples/smart_scraper_example.py
+++ b/examples/graph_examples/smart_scraper_example.py
@ -17,7 +17,7 @@ llm_config = {

 # Define URL and PROMPT
 URL = "https://perinim.github.io/projects/"
-PROMPT = "List me all the titles and project descriptions"
+PROMPT = "List me all the titles and project descriptions and give me an audio"

 # Create the SmartScraperGraph instance
 smart_scraper_graph = SmartScraperGraph(PROMPT, URL, llm_config)
--- a/examples/graph_examples/speech_summary_graph_example.py
+++ b/examples/graph_examples/speech_summary_graph_example.py
@ -18,9 +18,9 @@ llm_config = {
 curr_dir = os.path.dirname(os.path.realpath(__file__))
 output_file_path = os.path.join(curr_dir, "website_summary.mp3")

-speech_summary_graph = SpeechSummaryGraph("""Make a summary of the webpage to be
+speech_summary_graph = SpeechSummaryGraph("""Make a summary of the news to be
 converted to audio for blind people.""",
-                                          "https://perinim.github.io/projects/", llm_config,
+                                          "https://www.wired.com/category/science/", llm_config,
                                          output_file_path)

 final_state = speech_summary_graph.run()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scrapegraphai"
-version = "0.0.6"
+version = "0.0.6-alpha"
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
    "Marco Vinciguerra <mvincig11@gmail.com>",
--- a/scrapegraphai/graphs/speech_summary_graph.py
+++ b/scrapegraphai/graphs/speech_summary_graph.py
@ -6,10 +6,8 @@ from ..models import OpenAI, OpenAITextToSpeech
 from .base_graph import BaseGraph
 from ..nodes import (
    FetchHTMLNode,
-    ConditionalNode,
-    GetProbableTagsNode,
+    RAGNode,
    GenerateAnswerNode,
-    ParseHTMLNode,
    TextToSpeechNode,
 )

@ -36,7 +34,7 @@ class SpeechSummaryGraph:
        output_path (str): The file path where the generated MP3 should be saved.
    """

-    def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str):
+    def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str = "website_summary.mp3"):
        """
        Initializes the SmartScraper with a prompt, URL, and language model configuration.
        """
@ -80,28 +78,21 @@ class SpeechSummaryGraph:
            BaseGraph: An instance of the BaseGraph class.
        """
        fetch_html_node = FetchHTMLNode("fetch_html")
-        get_probable_tags_node = GetProbableTagsNode(
-            self.llm, "get_probable_tags")
-        parse_document_node = ParseHTMLNode("parse_document")
+        rag_node = RAGNode(self.llm, "rag")
        generate_answer_node = GenerateAnswerNode(self.llm, "generate_answer")
-        conditional_node = ConditionalNode(
-            "conditional", [parse_document_node, generate_answer_node])
        text_to_speech_node = TextToSpeechNode(
            self.text_to_speech_model, "text_to_speech")

        return BaseGraph(
            nodes={
                fetch_html_node,
-                get_probable_tags_node,
-                conditional_node,
-                parse_document_node,
+                rag_node,
                generate_answer_node,
                text_to_speech_node
            },
            edges={
-                (fetch_html_node, get_probable_tags_node),
-                (get_probable_tags_node, conditional_node),
-                (parse_document_node, generate_answer_node),
+                (fetch_html_node, rag_node),
+                (rag_node, generate_answer_node),
                (generate_answer_node, text_to_speech_node)
            },
            entry_point=fetch_html_node