docs: refactor nodes docstrings

This commit is contained in:
Marco Perini 2024-05-01 23:17:57 +02:00
parent e9817963c8
commit 1409797475
12 changed files with 192 additions and 291 deletions

View File

@ -1,5 +1,5 @@
""" """
Module for defining BaseNode, an abstract base class for nodes in a graph-based workflow. BaseNode Module
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod

View File

@ -1,5 +1,5 @@
""" """
Module for fetching the HTML node FetchNode Module
""" """
from typing import List, Optional from typing import List, Optional
@ -27,10 +27,6 @@ class FetchNode(BaseNode):
output (List[str]): List of output keys to be updated in the state. output (List[str]): List of output keys to be updated in the state.
node_config (Optional[dict]): Additional configuration for the node. node_config (Optional[dict]): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "Fetch". node_name (str): The unique identifier name for the node, defaulting to "Fetch".
Methods:
execute(state): Fetches the HTML content for the URL specified in the state
and updates the state with the fetched content under the specified output key.
""" """
def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"): def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"):
@ -45,13 +41,14 @@ class FetchNode(BaseNode):
update the state with this content. update the state with this content.
Args: Args:
state (dict): The current state of the graph, expected to contain a 'url' key. state (dict): The current state of the graph. The input keys will be used
to fetch the correct data types from the state.
Returns: Returns:
dict: The updated state with a new 'document' key containing the fetched HTML content. dict: The updated state with a new output key containing the fetched HTML content.
Raises: Raises:
KeyError: If the 'url' key is not found in the state, indicating that the KeyError: If the input key is not found in the state, indicating that the
necessary information to perform the operation is missing. necessary information to perform the operation is missing.
""" """
if self.verbose: if self.verbose:

View File

@ -1,6 +1,7 @@
""" """
Module for generating the answer node GenerateAnswerNode Module
""" """
# Imports from standard library # Imports from standard library
from typing import List from typing import List
from tqdm import tqdm from tqdm import tqdm
@ -16,57 +17,43 @@ from .base_node import BaseNode
class GenerateAnswerNode(BaseNode): class GenerateAnswerNode(BaseNode):
""" """
A node that generates an answer using a language model (LLM) based on the user's input A node that generates an answer using a large language model (LLM) based on the user's input
and the content extracted from a webpage. It constructs a prompt from the user's input and the content extracted from a webpage. It constructs a prompt from the user's input
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer. an answer.
Attributes: Attributes:
llm: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
node_name (str): The unique identifier name for the node, defaulting verbose (bool): A flag indicating whether to show print statements during execution.
to "GenerateAnswerNode".
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
Args: Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used input (str): Boolean expression defining the input keys needed from the state.
for generating answers. output (List[str]): List of output keys to be updated in the state.
node_name (str, optional): The unique identifier name for the node. node_config (dict): Additional configuration for the node.
Defaults to "GenerateAnswerNode". node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GenerateAnswer"): node_name: str = "GenerateAnswer"):
"""
Initializes the GenerateAnswerNode with a language model client and a node name.
Args:
llm: An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, node_config) super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Generates an answer by constructing a prompt from the user's input and the scraped Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response. content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
Args: Args:
state (dict): The current state of the graph, expected to contain 'user_input', state (dict): The current state of the graph. The input keys will be used
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'. to fetch the correct data from the state.
Returns: Returns:
dict: The updated state with the 'answer' key containing the generated answer. dict: The updated state with the output key containing the generated answer.
Raises: Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating KeyError: If the input keys are not found in the state, indicating
that the necessary information for generating an answer is missing. that the necessary information for generating an answer is missing.
""" """

View File

@ -1,6 +1,7 @@
""" """
Module for generating the answer node GenerateScraperNode Module
""" """
# Imports from standard library # Imports from standard library
from typing import List from typing import List
from tqdm import tqdm from tqdm import tqdm
@ -16,58 +17,46 @@ from .base_node import BaseNode
class GenerateScraperNode(BaseNode): class GenerateScraperNode(BaseNode):
""" """
A node that generates an answer using a language model (LLM) based on the user's input Generates a python script for scraping a website using the specified library.
and the content extracted from a webpage. It constructs a prompt from the user's input It takes the user's prompt and the scraped content as input and generates a python script
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce that extracts the information requested by the user.
an answer.
Attributes: Attributes:
llm: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
node_name (str): The unique identifier name for the node, defaulting library (str): The python library to use for scraping the website.
to "GenerateScraperNode". source (str): The website to scrape.
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
Args: Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used input (str): Boolean expression defining the input keys needed from the state.
for generating answers. output (List[str]): List of output keys to be updated in the state.
node_name (str, optional): The unique identifier name for the node. node_config (dict): Additional configuration for the node.
Defaults to "GenerateScraperNode". library (str): The python library to use for scraping the website.
website (str): The website to scrape.
node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, def __init__(self, input: str, output: List[str], node_config: dict,
library: str, website: str, node_name: str = "GenerateAnswer"): library: str, website: str, node_name: str = "GenerateAnswer"):
"""
Initializes the GenerateScraperNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, node_config) super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.library = library self.library = library
self.source = website self.source = website
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Generates an answer by constructing a prompt from the user's input and the scraped Generates a python script for scraping a website using the specified library.
content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key.
Args: Args:
state (dict): The current state of the graph, expected to contain 'user_input', state (dict): The current state of the graph. The input keys will be used
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'. to fetch the correct data from the state.
Returns: Returns:
dict: The updated state with the 'answer' key containing the generated answer. dict: The updated state with the output key containing the generated answer.
Raises: Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating KeyError: If input keys are not found in the state, indicating
that the necessary information for generating an answer is missing. that the necessary information for generating an answer is missing.
""" """

View File

@ -1,6 +1,7 @@
""" """
Module for proobable tags GetProbableTagsNode Module
""" """
from typing import List from typing import List
from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
@ -15,47 +16,36 @@ class GetProbableTagsNode(BaseNode):
list of probable tags. list of probable tags.
Attributes: Attributes:
llm: An instance of a language model client, configured for generating tag predictions. llm_model: An instance of the language model client used for tag predictions.
node_name (str): The unique identifier name for the node,
defaulting to "GetProbableTagsNode".
node_type (str): The type of the node, set to "node" indicating a standard operational node.
Args: Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used for tag predictions. input (str): Boolean expression defining the input keys needed from the state.
node_name (str, optional): The unique identifier name for the node. output (List[str]): List of output keys to be updated in the state.
Defaults to "GetProbableTagsNode". model_config (dict): Additional configuration for the language model.
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
Methods:
execute(state): Processes the user's input and the URL from the state to generate a list of
probable HTML tags, updating the state with these tags under the 'tags' key.
""" """
def __init__(self, input: str, output: List[str], model_config: dict, def __init__(self, input: str, output: List[str], model_config: dict,
node_name: str = "GetProbableTags"): node_name: str = "GetProbableTags"):
"""
Initializes the GetProbableTagsNode with a language model client and a node name.
Args:
llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 2, model_config) super().__init__(node_name, "node", input, output, 2, model_config)
self.llm_model = model_config["llm_model"] self.llm_model = model_config["llm_model"]
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Generates a list of probable HTML tags based on the user's input and updates the state Generates a list of probable HTML tags based on the user's input and updates the state
with this list. The method constructs a prompt for the language model, submits it, and with this list. The method constructs a prompt for the language model, submits it, and
parses the output to identify probable tags. parses the output to identify probable tags.
Args: Args:
state (dict): The current state of the graph, expected to contain 'user_input', 'url', state (dict): The current state of the graph. The input keys will be used to fetch the
and optionally 'document' within 'keys'. correct data types from the state.
Returns: Returns:
dict: The updated state with the 'tags' key containing a list of probable HTML tags. dict: The updated state with the input key containing a list of probable HTML tags.
Raises: Raises:
KeyError: If 'user_input' or 'url' is not found in the state, indicating that the KeyError: If input keys are not found in the state, indicating that the
necessary information for generating tag predictions is missing. necessary information for generating tag predictions is missing.
""" """

View File

@ -1,45 +1,44 @@
""" """
Module for the ImageToTextNode class. ImageToTextNode Module
""" """
from typing import List from typing import List
from .base_node import BaseNode from .base_node import BaseNode
class ImageToTextNode(BaseNode): class ImageToTextNode(BaseNode):
""" """
A class representing a node that processes an image and returns the text description. Retrieve an image from an URL and convert it to text using an ImageToText model.
Attributes: Attributes:
llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class. llm_model: An instance of the language model client used for image-to-text conversion.
verbose (bool): A flag indicating whether to show print statements during execution.
Methods: Args:
execute(state, url): Execute the node's logic and return the updated state. input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "ImageToText".
""" """
def __init__(self, input: str, output: List[str], node_config: dict, def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "ImageToText"): node_name: str = "ImageToText"):
"""
Initializes an instance of the ImageToTextNode class.
Args:
input (str): The input for the node.
output (List[str]): The output of the node.
node_config (dict): Configuration for the model.
node_name (str): Name of the node.
"""
super().__init__(node_name, "node", input, output, 1, node_config) super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm_model"] self.llm_model = node_config["llm_model"]
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state: dict) -> dict: def execute(self, state: dict) -> dict:
""" """
Execute the node's logic and return the updated state. Generate text from an image using an image-to-text model. The method retrieves the image
from the URL provided in the state.
Args: Args:
state (dict): The current state of the graph. state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns: Returns:
dict: The updated state after executing this node. dict: The updated state with the input key containing the text extracted from the image.
""" """
if self.verbose: if self.verbose:

View File

@ -1,6 +1,7 @@
""" """
Module for parsing the HTML node ParseNode Module
""" """
from typing import List from typing import List
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import Html2TextTransformer from langchain_community.document_transformers import Html2TextTransformer
@ -10,56 +11,40 @@ from .base_node import BaseNode
class ParseNode(BaseNode): class ParseNode(BaseNode):
""" """
A node responsible for parsing HTML content from a document. A node responsible for parsing HTML content from a document.
It uses BeautifulSoupTransformer for parsing, providing flexibility in extracting The parsed content is split into chunks for further processing.
specific parts of an HTML document.
This node enhances the scraping workflow by allowing for targeted extraction of This node enhances the scraping workflow by allowing for targeted extraction of
content, thereby optimizing the processing of large HTML documents. content, thereby optimizing the processing of large HTML documents.
Attributes: Attributes:
node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode". verbose (bool): A flag indicating whether to show print statements during execution.
node_type (str): The type of the node, set to "node" indicating a standard operational node.
Args: Args:
node_name (str, optional): The unique identifier name for the node. input (str): Boolean expression defining the input keys needed from the state.
Defaults to "ParseHTMLNode". output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
Methods: node_name (str): The unique identifier name for the node, defaulting to "Parse".
execute(state): Parses the HTML document contained within the state using
the specified tags, if provided, and updates the state with the parsed content.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "Parse"): def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "Parse"):
"""
Initializes the ParseHTMLNode with a node name.
Args:
doc_type (str): type of the input document
chunks_size (int): size of the chunks to split the document
node_name (str): name of the node
node_type (str, optional): type of the node
"""
super().__init__(node_name, "node", input, output, 1, node_config) super().__init__(node_name, "node", input, output, 1, node_config)
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Executes the node's logic to parse the HTML document based on specified tags. Executes the node's logic to parse the HTML document content and split it into chunks.
If tags are provided in the state, the document is parsed accordingly; otherwise,
the document remains unchanged. The method updates the state with either the original
or parsed document under the 'parsed_document' key.
Args: Args:
state (dict): The current state of the graph, expected to contain state (dict): The current state of the graph. The input keys will be used to fetch the
'document' within 'keys', and optionally 'tags' for targeted parsing. correct data from the state.
Returns: Returns:
dict: The updated state with the 'parsed_document' key containing the parsed content, dict: The updated state with the output key containing the parsed content chunks.
if tags were provided, or the original document otherwise.
Raises: Raises:
KeyError: If 'document' is not found in the state, indicating that the necessary KeyError: If the input keys are not found in the state, indicating that the
information for parsing is missing. necessary information for parsing the content is missing.
""" """
if self.verbose: if self.verbose:

View File

@ -1,5 +1,5 @@
""" """
Module for parsing the HTML node RAGNode Module
""" """
from typing import List from typing import List
@ -18,46 +18,44 @@ from .base_node import BaseNode
class RAGNode(BaseNode): class RAGNode(BaseNode):
""" """
A node responsible for compressing the input tokens and storing the document A node responsible for compressing the input tokens and storing the document
in a vector database for retrieval. in a vector database for retrieval. Relevant chunks are stored in the state.
It allows scraping of big documents without exceeding the token limit of the language model. It allows scraping of big documents without exceeding the token limit of the language model.
Attributes: Attributes:
node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode". llm_model: An instance of a language model client, configured for generating answers.
node_type (str): The type of the node, set to "node" indicating a standard operational node. embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
Args: Args:
node_name (str, optional): The unique identifier name for the node. input (str): Boolean expression defining the input keys needed from the state.
Defaults to "ParseHTMLNode". output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
Methods: node_name (str): The unique identifier name for the node, defaulting to "Parse".
execute(state): Parses the HTML document contained within the state using
the specified tags, if provided, and updates the state with the parsed content.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "RAG"): def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "RAG"):
"""
Initializes the ParseHTMLNode with a node name.
"""
super().__init__(node_name, "node", input, output, 2, node_config) super().__init__(node_name, "node", input, output, 2, node_config)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.embedder_model = node_config.get("embedder_model", None) self.embedder_model = node_config.get("embedder_model", None)
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Executes the node's logic to implement RAG (Retrieval-Augmented Generation) Executes the node's logic to implement RAG (Retrieval-Augmented Generation).
The method updates the state with relevant chunks of the document. The method updates the state with relevant chunks of the document.
Args: Args:
state (dict): The state containing the 'document' key with the HTML content state (dict): The current state of the graph. The input keys will be used to fetch the
correct data from the state.
Returns: Returns:
dict: The updated state containing the 'relevant_chunks' key with the relevant chunks. dict: The updated state with the output key containing the relevant chunks of the document.
Raises: Raises:
KeyError: If 'document' is not found in the state, indicating that the necessary KeyError: If the input keys are not found in the state, indicating that the
information for parsing is missing. necessary information for compressing the content is missing.
""" """
if self.verbose: if self.verbose:

View File

@ -1,6 +1,7 @@
""" """
Module for checking if a website is scrapepable or not RobotsNode Module
""" """
from typing import List from typing import List
from urllib.parse import urlparse from urllib.parse import urlparse
from langchain_community.document_loaders import AsyncHtmlLoader from langchain_community.document_loaders import AsyncHtmlLoader
@ -12,75 +13,53 @@ from ..helpers import robots_dictionary
class RobotsNode(BaseNode): class RobotsNode(BaseNode):
""" """
A node responsible for checking if a website is scrapepable or not. A node responsible for checking if a website is scrapeable or not based on the robots.txt file.
It uses the AsyncHtmlLoader for asynchronous It uses a language model to determine if the website allows scraping of the provided path.
document loading.
This node acts as a starting point in many scraping workflows, preparing the state This node acts as a starting point in many scraping workflows, preparing the state
with the necessary HTML content for further processing by subsequent nodes in the graph. with the necessary HTML content for further processing by subsequent nodes in the graph.
Attributes: Attributes:
This node acts as a starting point in many scraping workflows, preparing the state llm_model: An instance of the language model client used for checking scrapeability.
with the necessary HTML content for further processing by subsequent nodes in the graph. force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt.
Attributes: verbose (bool): A flag indicating whether to show print statements during execution.
node_name (str): The unique identifier name for the node.
node_type (str): The type of the node, defaulting to "node". This categorization
helps in determining the node's role and behavior within the graph.
The "node" type is used for standard operational nodes.
Args: Args:
node_name (str): The unique identifier name for the node. This name is used to input (str): Boolean expression defining the input keys needed from the state.
reference the node within the graph. output (List[str]): List of output keys to be updated in the state.
node_type (str, optional): The type of the node, limited to "node" or node_config (dict): Additional configuration for the node.
"conditional_node". Defaults to "node".
node_config (dict): Configuration parameters for the node.
force_scraping (bool): A flag indicating whether scraping should be enforced even force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt. Defaults to True. if disallowed by robots.txt. Defaults to True.
input (str): Input expression defining how to interpret the incoming data. node_name (str): The unique identifier name for the node, defaulting to "Robots".
output (List[str]): List of output keys where the results will be stored.
Methods:
execute(state): Fetches the HTML content for the URL specified in the state and
updates the state with this content under the 'document' key.
The 'url' key must be present in the state for the operation
to succeed.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True, def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
node_name: str = "Robots"): node_name: str = "Robots"):
"""
Initializes the RobotsNode with a node name, input/output expressions
and node configuration.
Arguments:
input (str): Input expression defining how to interpret the incoming data.
output (List[str]): List of output keys where the results will be stored.
node_config (dict): Configuration parameters for the node.
force_scraping (bool): A flag indicating whether scraping should be enforced even
if disallowed by robots.txt. Defaults to True.
node_name (str, optional): The unique identifier name for the node.
Defaults to "Robots".
"""
super().__init__(node_name, "node", input, output, 1) super().__init__(node_name, "node", input, output, 1)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.force_scraping = force_scraping self.force_scraping = force_scraping
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Executes the node's logic to fetch HTML content from a specified URL and Checks if a website is scrapeable based on the robots.txt file and updates the state
update the state with this content. with the scrapeability status. The method constructs a prompt for the language model,
submits it, and parses the output to determine if scraping is allowed.
Args: Args:
state (dict): The current state of the graph, expected to contain a 'url' key. state (dict): The current state of the graph. The input keys will be used to fetch the
Returns: Returns:
dict: The updated state with a new 'document' key containing the fetched HTML content. dict: The updated state with the output key containing the scrapeability status.
Raises: Raises:
KeyError: If the 'url' key is not found in the state, indicating that the KeyError: If the input keys are not found in the state, indicating that the
necessary information to perform the operation is missing. necessary information for checking scrapeability is missing.
KeyError: If the large language model is not found in the robots_dictionary.
ValueError: If the website is not scrapeable based on the robots.txt file and
scraping is not enforced.
""" """
if self.verbose: if self.verbose:

View File

@ -1,6 +1,7 @@
""" """
Module for generating the answer node SearchInternetNode Module
""" """
from typing import List from typing import List
from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
@ -10,63 +11,46 @@ from .base_node import BaseNode
class SearchInternetNode(BaseNode): class SearchInternetNode(BaseNode):
""" """
A node that generates an answer by querying a language model (LLM) based on the user's input A node that generates a search query based on the user's input and searches the internet
and the content extracted from a webpage. It constructs a prompt from the user's input for relevant information. The node constructs a prompt for the language model, submits it,
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce and processes the output to generate a search query. It then uses the search query to find
an answer. relevant information on the internet and updates the state with the generated answer.
Attributes: Attributes:
node_name (str): The unique identifier name for the node. llm_model: An instance of the language model client used for generating search queries.
node_type (str): The type of the node, set to "node" indicating a standard operational node. verbose (bool): A flag indicating whether to show print statements during execution.
input (str): The user input used to construct the prompt.
output (List[str]): The keys in the state dictionary
where the generated answer will be stored.
model_config (dict): Configuration parameters for the language model client.
Args: Args:
input (str): The user input used to construct the prompt. input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): The keys in the state dictionary where the output (List[str]): List of output keys to be updated in the state.
generated answer will be stored. node_config (dict): Additional configuration for the node.
model_config (dict): Configuration parameters for the language model client. node_name (str): The unique identifier name for the node, defaulting to "SearchInternet".
node_name (str, optional): The unique identifier name for the node.
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "SearchInternet"): node_name: str = "SearchInternet"):
"""
Initializes the SearchInternetNode with input, output, model configuration, and a node name.
Args:
input (str): The user input used to construct the prompt.
output (List[str]): The keys in the state dictionary where the
generated answer will be stored.
model_config (dict): Configuration parameters for the language model client.
node_name (str): The unique identifier name for the node.
"""
super().__init__(node_name, "node", input, output, 1, node_config) super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Generates an answer by constructing a prompt from the user's input and the scraped Generates an answer by constructing a prompt from the user's input and the scraped
content, querying the language model, and parsing its response. content, querying the language model, and parsing its response.
The method updates the state with the generated answer under the 'answer' key. The method updates the state with the generated answer.
Args: Args:
state (dict): The current state of the graph, expected to contain 'user_input', state (dict): The current state of the graph. The input keys will be used to fetch the
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'. correct data types from the state.
Returns: Returns:
dict: The updated state with the 'answer' key containing the generated answer. dict: The updated state with the output key containing the generated answer.
Raises: Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating KeyError: If the input keys are not found in the state, indicating that the
that the necessary information for generating an answer is missing. necessary information for generating the answer is missing.
""" """
if self.verbose: if self.verbose:

View File

@ -1,6 +1,7 @@
""" """
Module for generating the answer node SearchLinkNode Module
""" """
# Imports from standard library # Imports from standard library
from typing import List from typing import List
from tqdm import tqdm from tqdm import tqdm
@ -18,58 +19,42 @@ from .base_node import BaseNode
class SearchLinkNode(BaseNode): class SearchLinkNode(BaseNode):
""" """
A node that generates an answer using a language model (LLM) based on the user's input A node that look for all the links in a web page and returns them.
and the content extracted from a webpage. It constructs a prompt from the user's input It initially tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
an answer.
Attributes: Attributes:
llm: An instance of a language model client, configured for generating answers. llm_model: An instance of the language model client used for generating answers.
node_name (str): The unique identifier name for the node, defaulting verbose (bool): A flag indicating whether to show print statements during execution.
to "GenerateAnswerNode".
node_type (str): The type of the node, set to "node" indicating a
standard operational node.
Args: Args:
llm: An instance of the language model client (e.g., ChatOpenAI) used input (str): Boolean expression defining the input keys needed from the state.
for generating answers. output (List[str]): List of output keys to be updated in the state.
node_name (str, optional): The unique identifier name for the node. node_config (dict): Additional configuration for the node.
Defaults to "GenerateAnswerNode". node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
Methods:
execute(state): Processes the input and document from the state to generate an answer,
updating the state with the generated answer under the 'answer' key.
""" """
def __init__(self, input: str, output: List[str], node_config: dict, def __init__(self, input: str, output: List[str], node_config: dict,
node_name: str = "GenerateLinks"): node_name: str = "GenerateLinks"):
"""
Initializes the GenerateAnswerNode with a language model client and a node name.
Args:
llm: An instance of the OpenAIImageToText class.
node_name (str): name of the node
"""
super().__init__(node_name, "node", input, output, 1, node_config) super().__init__(node_name, "node", input, output, 1, node_config)
self.llm_model = node_config["llm"] self.llm_model = node_config["llm"]
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Generates an answer by constructing a prompt from the user's input and the scraped Generates a list of links by extracting them from the provided HTML content.
content, querying the language model, and parsing its response. First, it tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
The method updates the state with the generated answer under the 'answer' key.
Args: Args:
state (dict): The current state of the graph, expected to contain 'user_input', state (dict): The current state of the graph. The input keys will be used to fetch the
and optionally 'parsed_document' or 'relevant_chunks' within 'keys'. correct data types from the state.
Returns: Returns:
dict: The updated state with the 'answer' key containing the generated answer. dict: The updated state with the output key containing the list of links.
Raises: Raises:
KeyError: If 'user_input' or 'document' is not found in the state, indicating KeyError: If the input keys are not found in the state, indicating that the
that the necessary information for generating an answer is missing. necessary information for generating the answer is missing.
""" """
if self.verbose: if self.verbose:
@ -90,7 +75,7 @@ class SearchLinkNode(BaseNode):
except Exception as e: except Exception as e:
if self.verbose: if self.verbose:
print("error on using classical methods. Using LLM for getting the links") print("Error extracting links using classical methods. Using LLM to extract links.")
output_parser = JsonOutputParser() output_parser = JsonOutputParser()

View File

@ -1,39 +1,47 @@
"""
TextToSpeechNode Module
"""
"""
Module for parsing the text to voice
"""
from typing import List from typing import List
from .base_node import BaseNode from .base_node import BaseNode
class TextToSpeechNode(BaseNode): class TextToSpeechNode(BaseNode):
""" """
A class representing a node that processes text and returns the voice. Converts text to speech using the specified text-to-speech model.
Attributes: Attributes:
llm (OpenAITextToSpeech): An instance of the OpenAITextToSpeech class. tts_model: An instance of the text-to-speech model client.
verbose (bool): A flag indicating whether to show print statements during execution.
Methods: Args:
execute(state, text): Execute the node's logic and return the updated state. input (str): Boolean expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
node_config (dict): Additional configuration for the node.
node_name (str): The unique identifier name for the node, defaulting to "TextToSpeech".
""" """
def __init__(self, input: str, output: List[str], def __init__(self, input: str, output: List[str],
node_config: dict, node_name: str = "TextToSpeech"): node_config: dict, node_name: str = "TextToSpeech"):
"""
Initializes an instance of the TextToSpeechNode class.
"""
super().__init__(node_name, "node", input, output, 1, node_config) super().__init__(node_name, "node", input, output, 1, node_config)
self.tts_model = node_config["tts_model"] self.tts_model = node_config["tts_model"]
self.verbose = True if node_config is None else node_config.get("verbose", False) self.verbose = True if node_config is None else node_config.get("verbose", False)
def execute(self, state): def execute(self, state: dict) -> dict:
""" """
Execute the node's logic and return the updated state. Converts text to speech using the specified text-to-speech model.
Args:
state (dict): The current state of the graph.
text (str): The text to convert to speech.
:return: The updated state after executing this node. Args:
state (dict): The current state of the graph. The input keys will be used to fetch the
correct data types from the state.
Returns:
dict: The updated state with the output key containing the audio generated from the text.
Raises:
KeyError: If the input keys are not found in the state, indicating that the
necessary information for generating the audio is missing.
""" """
if self.verbose: if self.verbose: