docs: refactor nodes docstrings

This commit is contained in:
Marco Perini 2024-05-01 23:17:57 +02:00
parent e9817963c8
commit 1409797475
12 changed files with 192 additions and 291 deletions

View File

@ -1,5 +1,5 @@
"""
Module for defining BaseNode, an abstract base class for nodes in a graph-based workflow.
BaseNode Module
"""
from abc import ABC, abstractmethod

View File

@ -1,5 +1,5 @@
"""
Module for fetching the HTML node
FetchNode Module
"""
from typing import List, Optional
@ -27,10 +27,6 @@ class FetchNode(BaseNode):
output (List[str]): List of output keys to be updated in the state.
node_config (Optional[dict]): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "Fetch".
Methods:
execute(state): Fetches the HTML content for the URL specified in the state
and updates the state with the fetched content under the specified output key.
"""
def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"):
@ -45,13 +41,14 @@ class FetchNode(BaseNode):
update the state with this content.
Args:
state (dict): The current state of the graph, expected to contain a 'url' key.
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data types from the state.
Returns:
dict: The updated state with a new 'document' key containing the fetched HTML content.
dict: The updated state with a new output key containing the fetched HTML content.
Raises:
KeyError: If the 'url' key is not found in the state, indicating that the
KeyError: If the input key is not found in the state, indicating that the
necessary information to perform the operation is missing.
"""
if self.verbose:

View File

@ -1,6 +1,7 @@
"""
Module for generating the answer node
GenerateAnswerNode Module
"""
# Imports from standard library
from typing import List
from tqdm import tqdm
@ -16,57 +17,43 @@ from .base_node import BaseNode
class GenerateAnswerNode(BaseNode):
"""
A node that generates an answer using a language model (LLM) based on the user's input
A node that generates an answer using a large language model (LLM) based on the user's input
and the content extracted from a webpage. It constructs a prompt from the user's input
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer.
Attributes:
llm: An instance of a language model client, configured for generating answers.
node_name (str): The unique identifier name for the node, defaulting
to "GenerateAnswerNode".
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
llm_model: An instance of a language model client, configured for generating answers.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used
for generating answers.
node_name (str, optional): The unique identifier name for the node.
Defaults to "GenerateAnswerNode".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
"""
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GenerateAnswer"):
"""
Initializes the GenerateAnswerNode with a language model client and a node name.
Args:
llm: An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
Args:
state (dict): The current state of the graph, expected to contain 'user_input',
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data from the state.
Returns:
dict: The updated state with the 'answer' key containing the generated answer.
dict: The updated state with the output key containing the generated answer.
Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating
KeyError: If the input keys are not found in the state, indicating
that the necessary information for generating an answer is missing.
"""

View File

@ -1,6 +1,7 @@
"""
Module for generating the answer node
GenerateScraperNode Module
"""
# Imports from standard library
from typing import List
from tqdm import tqdm
@ -16,58 +17,46 @@ from .base_node import BaseNode
class GenerateScraperNode(BaseNode):
"""
A node that generates an answer using a language model (LLM) based on the user's input
and the content extracted from a webpage. It constructs a prompt from the user's input
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer.
Generates a python script for scraping a website using the specified library.
It takes the user's prompt and the scraped content as input and generates a python script
that extracts the information requested by the user.
Attributes:
llm: An instance of a language model client, configured for generating answers.
node_name (str): The unique identifier name for the node, defaulting
to "GenerateScraperNode".
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
llm_model: An instance of a language model client, configured for generating answers.
library (str): The python library to use for scraping the website.
source (str): The website to scrape.
Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used
for generating answers.
node_name (str, optional): The unique identifier name for the node.
Defaults to "GenerateScraperNode".
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
library (str): The python library to use for scraping the website.
website (str): The website to scrape.
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
"""
def __init__(self, input: str, output: List[str], node_config: dict,
library: str, website: str, node_name: str = "GenerateAnswer"):
"""
Initializes the GenerateScraperNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"]
self.library = library
self.source = website
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
Generates a python script for scraping a website using the specified library.
Args:
state (dict): The current state of the graph, expected to contain 'user_input',
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
state (dict): The current state of the graph. The input keys will be used
to fetch the correct data from the state.
Returns:
dict: The updated state with the 'answer' key containing the generated answer.
dict: The updated state with the output key containing the generated answer.
Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating
KeyError: If input keys are not found in the state, indicating
that the necessary information for generating an answer is missing.
"""

View File

@ -1,6 +1,7 @@
"""
Module for proobable tags
GetProbableTagsNode Module
"""
from typing import List
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
@ -15,47 +16,36 @@ class GetProbableTagsNode(BaseNode):
list of probable tags.
Attributes:
llm: An instance of a language model client, configured for generating tag predictions.
node_name (str): The unique identifier name for the node,
defaulting to "GetProbableTagsNode".
node_type (str): The type of the node, set to "node" indicating a standard operational node.
llm_model: An instance of the language model client used for tag predictions.
Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used for tag predictions.
node_name (str, optional): The unique identifier name for the node.
Defaults to "GetProbableTagsNode".
Methods:
execute(state): Processes the user's input and the URL from the state to generate a list of
probable HTML tags, updating the state with these tags under the 'tags' key.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
model_config (dict): Additional configuration for the language model.
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
"""
def __init__(self, input: str, output: List[str], model_config: dict,
node_name: str = "GetProbableTags"):
"""
Initializes the GetProbableTagsNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, model_config)
self.llm_model = model_config["llm_model"]
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Generates a list of probable HTML tags based on the user's input and updates the state
with this list. The method constructs a prompt for the language model, submits it, and
parses the output to identify probable tags.
Args:
state (dict): The current state of the graph, expected to contain 'user_input', 'url',
and optionally 'document' within 'keys'.
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state with the 'tags' key containing a list of probable HTML tags.
dict: The updated state with the input key containing a list of probable HTML tags.
Raises:
KeyError: If 'user_input' or 'url' is not found in the state, indicating that the
KeyError: If input keys are not found in the state, indicating that the
necessary information for generating tag predictions is missing.
"""

View File

@ -1,45 +1,44 @@
"""
Module for the ImageToTextNode class.
ImageToTextNode Module
"""
from typing import List
from .base_node import BaseNode
class ImageToTextNode(BaseNode):
"""
A class representing a node that processes an image and returns the text description.
Retrieve an image from an URL and convert it to text using an ImageToText model.
Attributes:
llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class.
llm_model: An instance of the language model client used for image-to-text conversion.
verbose (bool): A flag indicating whether to show print statements during execution.
Methods:
execute(state, url): Execute the node's logic and return the updated state.
Args:
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "ImageToText".
"""
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "ImageToText"):
"""
Initializes an instance of the ImageToTextNode class.
Args:
input (str): The input for the node.
output (List[str]): The output of the node.
node_config (dict): Configuration for the model.
node_name (str): Name of the node.
"""
super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm_model"]
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state: dict) -> dict:
"""
Execute the node's logic and return the updated state.
Generate text from an image using an image-to-text model. The method retrieves the image
from the URL provided in the state.
Args:
state (dict): The current state of the graph.
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state after executing this node.
dict: The updated state with the input key containing the text extracted from the image.
"""
if self.verbose:

View File

@ -1,6 +1,7 @@
"""
Module for parsing the HTML node
ParseNode Module
"""
from typing import List
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import Html2TextTransformer
@ -10,56 +11,40 @@ from .base_node import BaseNode
class ParseNode(BaseNode):
"""
A node responsible for parsing HTML content from a document.
It uses BeautifulSoupTransformer for parsing, providing flexibility in extracting
specific parts of an HTML document.
The parsed content is split into chunks for further processing.
This node enhances the scraping workflow by allowing for targeted extraction of
content, thereby optimizing the processing of large HTML documents.
Attributes:
node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode".
node_type (str): The type of the node, set to "node" indicating a standard operational node.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
node_name (str, optional): The unique identifier name for the node.
Defaults to "ParseHTMLNode".
Methods:
execute(state): Parses the HTML document contained within the state using
the specified tags, if provided, and updates the state with the parsed content.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "Parse".
"""
def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "Parse"):
"""
Initializes the ParseHTMLNode with a node name.
Args:
doc_type (str): type of the input document
chunks_size (int): size of the chunks to split the document
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node", input, output, 1, node_config)
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Executes the node's logic to parse the HTML document based on specified tags.
If tags are provided in the state, the document is parsed accordingly; otherwise,
the document remains unchanged. The method updates the state with either the original
or parsed document under the 'parsed_document' key.
Executes the node's logic to parse the HTML document content and split it into chunks.
Args:
state (dict): The current state of the graph, expected to contain
'document' within 'keys', and optionally 'tags' for targeted parsing.
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data from the state.
Returns:
dict: The updated state with the 'parsed_document' key containing the parsed content,
if tags were provided, or the original document otherwise.
dict: The updated state with the output key containing the parsed content chunks.
Raises:
KeyError: If 'document' is not found in the state, indicating that the necessary
information for parsing is missing.
KeyError: If the input keys are not found in the state, indicating that the
necessary information for parsing the content is missing.
"""
if self.verbose:

View File

@ -1,5 +1,5 @@
"""
Module for parsing the HTML node
RAGNode Module
"""
from typing import List
@ -18,46 +18,44 @@ from .base_node import BaseNode
class RAGNode(BaseNode):
"""
A node responsible for compressing the input tokens and storing the document
in a vector database for retrieval.
in a vector database for retrieval. Relevant chunks are stored in the state.
It allows scraping of big documents without exceeding the token limit of the language model.
Attributes:
node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode".
node_type (str): The type of the node, set to "node" indicating a standard operational node.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
node_name (str, optional): The unique identifier name for the node.
Defaults to "ParseHTMLNode".
Methods:
execute(state): Parses the HTML document contained within the state using
the specified tags, if provided, and updates the state with the parsed content.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "Parse".
"""
def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "RAG"):
"""
Initializes the ParseHTMLNode with a node name.
"""
super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"]
self.embedder_model = node_config.get("embedder_model", None)
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Executes the node's logic to implement RAG (Retrieval-Augmented Generation)
Executes the node's logic to implement RAG (Retrieval-Augmented Generation).
The method updates the state with relevant chunks of the document.
Args:
state (dict): The state containing the 'document' key with the HTML content
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data from the state.
Returns:
dict: The updated state containing the 'relevant_chunks' key with the relevant chunks.
dict: The updated state with the output key containing the relevant chunks of the document.
Raises:
KeyError: If 'document' is not found in the state, indicating that the necessary
information for parsing is missing.
KeyError: If the input keys are not found in the state, indicating that the
necessary information for compressing the content is missing.
"""
if self.verbose:

View File

@ -1,6 +1,7 @@
"""
Module for checking if a website is scrapepable or not
RobotsNode Module
"""
from typing import List
from urllib.parse import urlparse
from langchain_community.document_loaders import AsyncHtmlLoader
@ -12,75 +13,53 @@ from ..helpers import robots_dictionary
class RobotsNode(BaseNode):
"""
A node responsible for checking if a website is scrapepable or not.
It uses the AsyncHtmlLoader for asynchronous
document loading.
A node responsible for checking if a website is scrapeable or not based on the robots.txt file.
It uses a language model to determine if the website allows scraping of the provided path.
This node acts as a starting point in many scraping workflows, preparing the state
with the necessary HTML content for further processing by subsequent nodes in the graph.
Attributes:
This node acts as a starting point in many scraping workflows, preparing the state
with the necessary HTML content for further processing by subsequent nodes in the graph.
Attributes:
node_name (str): The unique identifier name for the node.
node_type (str): The type of the node, defaulting to "node". This categorization
helps in determining the node's role and behavior within the graph.
The "node" type is used for standard operational nodes.
llm_model: An instance of the language model client used for checking scrapeability.
force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
node_name (str): The unique identifier name for the node. This name is used to
reference the node within the graph.
node_type (str, optional): The type of the node, limited to "node" or
"conditional_node". Defaults to "node".
node_config (dict): Configuration parameters for the node.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt. Defaults to True.
input (str): Input expression defining how to interpret the incoming data.
output (List[str]): List of output keys where the results will be stored.
Methods:
execute(state): Fetches the HTML content for the URL specified in the state and
updates the state with this content under the 'document' key.
The 'url' key must be present in the state for the operation
to succeed.
if disallowed by robots.txt. Defaults to True.
node_name (str): The unique identifier name for the node, defaulting to "Robots".
"""
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
node_name: str = "Robots"):
"""
Initializes the RobotsNode with a node name, input/output expressions
and node configuration.
Arguments:
input (str): Input expression defining how to interpret the incoming data.
output (List[str]): List of output keys where the results will be stored.
node_config (dict): Configuration parameters for the node.
force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt. Defaults to True.
node_name (str, optional): The unique identifier name for the node.
Defaults to "Robots".
"""
super().__init__(node_name, "node", input, output, 1)
self.llm_model = node_config["llm"]
self.force_scraping = force_scraping
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Executes the node's logic to fetch HTML content from a specified URL and
update the state with this content.
Checks if a website is scrapeable based on the robots.txt file and updates the state
with the scrapeability status. The method constructs a prompt for the language model,
submits it, and parses the output to determine if scraping is allowed.
Args:
state (dict): The current state of the graph, expected to contain a 'url' key.
state (dict): The current state of the graph. The input keys will be used to fetch the
Returns:
dict: The updated state with a new 'document' key containing the fetched HTML content.
dict: The updated state with the output key containing the scrapeability status.
Raises:
KeyError: If the 'url' key is not found in the state, indicating that the
necessary information to perform the operation is missing.
KeyError: If the input keys are not found in the state, indicating that the
necessary information for checking scrapeability is missing.
KeyError: If the large language model is not found in the robots_dictionary.
ValueError: If the website is not scrapeable based on the robots.txt file and
scraping is not enforced.
"""
if self.verbose:

View File

@ -1,6 +1,7 @@
"""
Module for generating the answer node
SearchInternetNode Module
"""
from typing import List
from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
@ -10,63 +11,46 @@ from .base_node import BaseNode
class SearchInternetNode(BaseNode):
"""
A node that generates an answer by querying a language model (LLM) based on the user's input
and the content extracted from a webpage. It constructs a prompt from the user's input
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer.
A node that generates a search query based on the user's input and searches the internet
for relevant information. The node constructs a prompt for the language model, submits it,
and processes the output to generate a search query. It then uses the search query to find
relevant information on the internet and updates the state with the generated answer.
Attributes:
node_name (str): The unique identifier name for the node.
node_type (str): The type of the node, set to "node" indicating a standard operational node.
input (str): The user input used to construct the prompt.
output (List[str]): The keys in the state dictionary
where the generated answer will be stored.
model_config (dict): Configuration parameters for the language model client.
llm_model: An instance of the language model client used for generating search queries.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
input (str): The user input used to construct the prompt.
output (List[str]): The keys in the state dictionary where the
generated answer will be stored.
model_config (dict): Configuration parameters for the language model client.
node_name (str, optional): The unique identifier name for the node.
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "SearchInternet".
"""
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "SearchInternet"):
"""
Initializes the SearchInternetNode with input, output, model configuration, and a node name.
Args:
input (str): The user input used to construct the prompt.
output (List[str]): The keys in the state dictionary where the
generated answer will be stored.
model_config (dict): Configuration parameters for the language model client.
node_name (str): The unique identifier name for the node.
"""
super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
The method updates the state with the generated answer.
Args:
state (dict): The current state of the graph, expected to contain 'user_input',
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state with the 'answer' key containing the generated answer.
dict: The updated state with the output key containing the generated answer.
Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating
that the necessary information for generating an answer is missing.
KeyError: If the input keys are not found in the state, indicating that the
necessary information for generating the answer is missing.
"""
if self.verbose:

View File

@ -1,6 +1,7 @@
"""
Module for generating the answer node
SearchLinkNode Module
"""
# Imports from standard library
from typing import List
from tqdm import tqdm
@ -18,58 +19,42 @@ from .base_node import BaseNode
class SearchLinkNode(BaseNode):
"""
A node that generates an answer using a language model (LLM) based on the user's input
and the content extracted from a webpage. It constructs a prompt from the user's input
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer.
A node that look for all the links in a web page and returns them.
It initially tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
Attributes:
llm: An instance of a language model client, configured for generating answers.
node_name (str): The unique identifier name for the node, defaulting
to "GenerateAnswerNode".
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
llm_model: An instance of the language model client used for generating answers.
verbose (bool): A flag indicating whether to show print statements during execution.
Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used
for generating answers.
node_name (str, optional): The unique identifier name for the node.
Defaults to "GenerateAnswerNode".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
"""
def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GenerateLinks"):
"""
Initializes the GenerateAnswerNode with a language model client and a node name.
Args:
llm: An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
Generates a list of links by extracting them from the provided HTML content.
First, it tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
Args:
state (dict): The current state of the graph, expected to contain 'user_input',
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state with the 'answer' key containing the generated answer.
dict: The updated state with the output key containing the list of links.
Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating
that the necessary information for generating an answer is missing.
KeyError: If the input keys are not found in the state, indicating that the
necessary information for generating the answer is missing.
"""
if self.verbose:
@ -90,7 +75,7 @@ class SearchLinkNode(BaseNode):
except Exception as e:
if self.verbose:
print("error on using classical methods. Using LLM for getting the links")
print("Error extracting links using classical methods. Using LLM to extract links.")
output_parser = JsonOutputParser()

View File

@ -1,39 +1,47 @@
"""
TextToSpeechNode Module
"""
"""
Module for parsing the text to voice
"""
from typing import List
from .base_node import BaseNode
class TextToSpeechNode(BaseNode):
"""
A class representing a node that processes text and returns the voice.
Converts text to speech using the specified text-to-speech model.
Attributes:
llm (OpenAITextToSpeech): An instance of the OpenAITextToSpeech class.
tts_model: An instance of the text-to-speech model client.
verbose (bool): A flag indicating whether to show print statements during execution.
Methods:
execute(state, text): Execute the node's logic and return the updated state.
Args:
input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "TextToSpeech".
"""
def __init__(self, input: str, output: List[str],
node_config: dict, node_name: str = "TextToSpeech"):
"""
Initializes an instance of the TextToSpeechNode class.
"""
super().__init__(node_name, "node", input, output, 1, node_config)
self.tts_model = node_config["tts_model"]
self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state):
def execute(self, state: dict) -> dict:
"""
Execute the node's logic and return the updated state.
Args:
state (dict): The current state of the graph.
text (str): The text to convert to speech.
Converts text to speech using the specified text-to-speech model.
:return: The updated state after executing this node.
Args:
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state with the output key containing the audio generated from the text.
Raises:
KeyError: If the input keys are not found in the state, indicating that the
necessary information for generating the audio is missing.
"""
if self.verbose: