working speech graph with rag

This commit is contained in:
PeriniM 2024-02-23 14:17:46 +01:00
parent 486294e5c2
commit ed64db24df
5 changed files with 13 additions and 24 deletions

View File

@ -4,7 +4,6 @@ Module for evaluating the graph
import os
from scrapegraphai.evaluators import TrulensEvaluator
from dotenv import load_dotenv
import pandas as pd
load_dotenv()
@ -19,9 +18,9 @@ llm_config = {
list_of_inputs = [
("List me all the titles and project descriptions",
"https://perinim.github.io/projects/", llm_config),
("Who is the author of the project?",
"https://perinim.github.io/projects/", llm_config),
("What is the project about?", "https://perinim.github.io/projects/", llm_config)
# ("Who is the author of the project?",
# "https://perinim.github.io/projects/", llm_config),
# ("What is the project about?", "https://perinim.github.io/projects/", llm_config),
]
# Create the TrulensEvaluator instance
@ -29,5 +28,4 @@ trulens_evaluator = TrulensEvaluator(openai_key)
# Evaluate SmartScraperGraph on the list of inputs
(results_df, answer) = trulens_evaluator.evaluate(list_of_inputs, dashboard=False)
# Print the results
print(answer)

View File

@ -17,7 +17,7 @@ llm_config = {
# Define URL and PROMPT
URL = "https://perinim.github.io/projects/"
PROMPT = "List me all the titles and project descriptions"
PROMPT = "List me all the titles and project descriptions and give me an audio"
# Create the SmartScraperGraph instance
smart_scraper_graph = SmartScraperGraph(PROMPT, URL, llm_config)

View File

@ -18,9 +18,9 @@ llm_config = {
curr_dir = os.path.dirname(os.path.realpath(__file__))
output_file_path = os.path.join(curr_dir, "website_summary.mp3")
speech_summary_graph = SpeechSummaryGraph("""Make a summary of the webpage to be
speech_summary_graph = SpeechSummaryGraph("""Make a summary of the news to be
converted to audio for blind people.""",
"https://perinim.github.io/projects/", llm_config,
"https://www.wired.com/category/science/", llm_config,
output_file_path)
final_state = speech_summary_graph.run()

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "scrapegraphai"
version = "0.0.6"
version = "0.0.6-alpha"
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
"Marco Vinciguerra <mvincig11@gmail.com>",

View File

@ -6,10 +6,8 @@ from ..models import OpenAI, OpenAITextToSpeech
from .base_graph import BaseGraph
from ..nodes import (
FetchHTMLNode,
ConditionalNode,
GetProbableTagsNode,
RAGNode,
GenerateAnswerNode,
ParseHTMLNode,
TextToSpeechNode,
)
@ -36,7 +34,7 @@ class SpeechSummaryGraph:
output_path (str): The file path where the generated MP3 should be saved.
"""
def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str):
def __init__(self, prompt: str, url: str, llm_config: dict, output_path: str = "website_summary.mp3"):
"""
Initializes the SmartScraper with a prompt, URL, and language model configuration.
"""
@ -80,28 +78,21 @@ class SpeechSummaryGraph:
BaseGraph: An instance of the BaseGraph class.
"""
fetch_html_node = FetchHTMLNode("fetch_html")
get_probable_tags_node = GetProbableTagsNode(
self.llm, "get_probable_tags")
parse_document_node = ParseHTMLNode("parse_document")
rag_node = RAGNode(self.llm, "rag")
generate_answer_node = GenerateAnswerNode(self.llm, "generate_answer")
conditional_node = ConditionalNode(
"conditional", [parse_document_node, generate_answer_node])
text_to_speech_node = TextToSpeechNode(
self.text_to_speech_model, "text_to_speech")
return BaseGraph(
nodes={
fetch_html_node,
get_probable_tags_node,
conditional_node,
parse_document_node,
rag_node,
generate_answer_node,
text_to_speech_node
},
edges={
(fetch_html_node, get_probable_tags_node),
(get_probable_tags_node, conditional_node),
(parse_document_node, generate_answer_node),
(fetch_html_node, rag_node),
(rag_node, generate_answer_node),
(generate_answer_node, text_to_speech_node)
},
entry_point=fetch_html_node