docs: refactor nodes docstrings

2026-06-25 21:11:11 +08:00 · 2024-05-01 23:17:57 +02:00 · 2024-05-01 23:17:57 +02:00 · 1409797475
commit 1409797475
parent e9817963c8
12 changed files with 192 additions and 291 deletions
--- a/scrapegraphai/nodes/base_node.py
+++ b/scrapegraphai/nodes/base_node.py
@ -1,5 +1,5 @@
 """ 
-Module for defining BaseNode, an abstract base class for nodes in a graph-based workflow.
+BaseNode Module
 """
 from abc import ABC, abstractmethod
--- a/scrapegraphai/nodes/fetch_node.py
+++ b/scrapegraphai/nodes/fetch_node.py
@ -1,5 +1,5 @@
 """ 
-Module for fetching the HTML node
+FetchNode Module
 """
 from typing import List, Optional
@ -27,10 +27,6 @@ class FetchNode(BaseNode):
        output (List[str]): List of output keys to be updated in the state.
        node_config (Optional[dict]): Additional configuration for the node.
        node_name (str): The unique identifier name for the node, defaulting to "Fetch".
    Methods:
        execute(state): Fetches the HTML content for the URL specified in the state
        and updates the state with the fetched content under the specified output key.
    """
    def __init__(self, input: str, output: List[str], node_config: Optional[dict], node_name: str = "Fetch"):
@ -45,13 +41,14 @@ class FetchNode(BaseNode):
        update the state with this content.
        Args:
-            state (dict): The current state of the graph, expected to contain a 'url' key.
+            state (dict): The current state of the graph. The input keys will be used
                            to fetch the correct data types from the state.
        Returns:
-            dict: The updated state with a new 'document' key containing the fetched HTML content.
+            dict: The updated state with a new output key containing the fetched HTML content.
        Raises:
-            KeyError: If the 'url' key is not found in the state, indicating that the
+            KeyError: If the input key is not found in the state, indicating that the
                    necessary information to perform the operation is missing.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/generate_answer_node.py
+++ b/scrapegraphai/nodes/generate_answer_node.py
@ -1,6 +1,7 @@
 """
-Module for generating the answer node
+GenerateAnswerNode Module
 """
 # Imports from standard library
 from typing import List
 from tqdm import tqdm
@ -16,57 +17,43 @@ from .base_node import BaseNode
 class GenerateAnswerNode(BaseNode):
    """
-    A node that generates an answer using a language model (LLM) based on the user's input
+    A node that generates an answer using a large language model (LLM) based on the user's input
    and the content extracted from a webpage. It constructs a prompt from the user's input
    and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
    an answer.
    Attributes:
-        llm: An instance of a language model client, configured for generating answers.
+        llm_model: An instance of a language model client, configured for generating answers.
-        node_name (str): The unique identifier name for the node, defaulting 
+        verbose (bool): A flag indicating whether to show print statements during execution.
        to "GenerateAnswerNode".
        node_type (str): The type of the node, set to "node" indicating a 
        standard operational node.
    Args:
-        llm: An instance of the language model client (e.g., ChatOpenAI) used 
+        input (str): Boolean expression defining the input keys needed from the state.
-        for generating answers.
+        output (List[str]): List of output keys to be updated in the state.
-        node_name (str, optional): The unique identifier name for the node. 
+        node_config (dict): Additional configuration for the node.
-        Defaults to "GenerateAnswerNode".
+        node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
    Methods:
        execute(state): Processes the input and document from the state to generate an answer,
                        updating the state with the generated answer under the 'answer' key.
    """
    def __init__(self, input: str, output: List[str], node_config: dict,
                 node_name: str = "GenerateAnswer"):
        """
        Initializes the GenerateAnswerNode with a language model client and a node name.
        Args:
            llm: An instance of the OpenAIImageToText class.
            node_name (str): name of the node
        """
        super().__init__(node_name, "node", input, output, 2, node_config)
        self.llm_model = node_config["llm"]
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
        Generates an answer by constructing a prompt from the user's input and the scraped
        content, querying the language model, and parsing its response.
        The method updates the state with the generated answer under the 'answer' key.
        Args:
-            state (dict): The current state of the graph, expected to contain 'user_input',
+            state (dict): The current state of the graph. The input keys will be used
-                          and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
+                            to fetch the correct data from the state.
        Returns:
-            dict: The updated state with the 'answer' key containing the generated answer.
+            dict: The updated state with the output key containing the generated answer.
        Raises:
-            KeyError: If 'user_input' or 'document' is not found in the state, indicating
+            KeyError: If the input keys are not found in the state, indicating
                      that the necessary information for generating an answer is missing.
        """
--- a/scrapegraphai/nodes/generate_scraper_node.py
+++ b/scrapegraphai/nodes/generate_scraper_node.py
@ -1,6 +1,7 @@
 """
-Module for generating the answer node
+GenerateScraperNode Module
 """
 # Imports from standard library
 from typing import List
 from tqdm import tqdm
@ -16,58 +17,46 @@ from .base_node import BaseNode
 class GenerateScraperNode(BaseNode):
    """
-    A node that generates an answer using a language model (LLM) based on the user's input
+    Generates a python script for scraping a website using the specified library.
-    and the content extracted from a webpage. It constructs a prompt from the user's input
+    It takes the user's prompt and the scraped content as input and generates a python script
-    and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
+    that extracts the information requested by the user.
    an answer.
    Attributes:
-        llm: An instance of a language model client, configured for generating answers.
+        llm_model: An instance of a language model client, configured for generating answers.
-        node_name (str): The unique identifier name for the node, defaulting 
+        library (str): The python library to use for scraping the website.
-        to "GenerateScraperNode".
+        source (str): The website to scrape.
        node_type (str): The type of the node, set to "node" indicating a 
        standard operational node.
    Args:
-        llm: An instance of the language model client (e.g., ChatOpenAI) used 
+        input (str): Boolean expression defining the input keys needed from the state.
-        for generating answers.
+        output (List[str]): List of output keys to be updated in the state.
-        node_name (str, optional): The unique identifier name for the node. 
+        node_config (dict): Additional configuration for the node.
-        Defaults to "GenerateScraperNode".
+        library (str): The python library to use for scraping the website.
        website (str): The website to scrape.
        node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
    Methods:
        execute(state): Processes the input and document from the state to generate an answer,
                        updating the state with the generated answer under the 'answer' key.
    """
    def __init__(self, input: str, output: List[str], node_config: dict,
                 library: str, website: str, node_name: str = "GenerateAnswer"):
        """
        Initializes the GenerateScraperNode with a language model client and a node name.
        Args:
            llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
            node_name (str): name of the node
        """
        super().__init__(node_name, "node", input, output, 2, node_config)
        self.llm_model = node_config["llm"]
        self.library = library
        self.source = website
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
-        Generates an answer by constructing a prompt from the user's input and the scraped
+        Generates a python script for scraping a website using the specified library.
        content, querying the language model, and parsing its response.
        The method updates the state with the generated answer under the 'answer' key.
        Args:
-            state (dict): The current state of the graph, expected to contain 'user_input',
+            state (dict): The current state of the graph. The input keys will be used
-                          and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
+                            to fetch the correct data from the state.
        Returns:
-            dict: The updated state with the 'answer' key containing the generated answer.
+            dict: The updated state with the output key containing the generated answer.
        Raises:
-            KeyError: If 'user_input' or 'document' is not found in the state, indicating
+            KeyError: If input keys are not found in the state, indicating
                      that the necessary information for generating an answer is missing.
        """
--- a/scrapegraphai/nodes/get_probable_tags_node.py
+++ b/scrapegraphai/nodes/get_probable_tags_node.py
@ -1,6 +1,7 @@
 """
-Module for proobable tags
+GetProbableTagsNode Module
 """
 from typing import List
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
@ -15,47 +16,36 @@ class GetProbableTagsNode(BaseNode):
    list of probable tags.
    Attributes:
-        llm: An instance of a language model client, configured for generating tag predictions.
+        llm_model: An instance of the language model client used for tag predictions.
        node_name (str): The unique identifier name for the node,
        defaulting to "GetProbableTagsNode".
        node_type (str): The type of the node, set to "node" indicating a standard operational node.
    Args:
-        llm: An instance of the language model client (e.g., ChatOpenAI) used for tag predictions.
+        input (str): Boolean expression defining the input keys needed from the state.
-        node_name (str, optional): The unique identifier name for the node. 
+        output (List[str]): List of output keys to be updated in the state.
-        Defaults to "GetProbableTagsNode".
+        model_config (dict): Additional configuration for the language model.
-
+        node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
    Methods:
        execute(state): Processes the user's input and the URL from the state to generate a list of 
                        probable HTML tags, updating the state with these tags under the 'tags' key.
    """
    def __init__(self, input: str, output: List[str], model_config: dict,
                 node_name: str = "GetProbableTags"):
        """
        Initializes the GetProbableTagsNode with a language model client and a node name.
        Args:
            llm (OpenAIImageToText): An instance of the OpenAIImageToText class.
            node_name (str): name of the node
        """
        super().__init__(node_name, "node", input, output, 2, model_config)
        self.llm_model = model_config["llm_model"]
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
        Generates a list of probable HTML tags based on the user's input and updates the state 
        with this list. The method constructs a prompt for the language model, submits it, and 
        parses the output to identify probable tags.
        Args:
-            state (dict): The current state of the graph, expected to contain 'user_input', 'url',
+            state (dict): The current state of the graph. The input keys will be used to fetch the
-                          and optionally 'document' within 'keys'.
+                            correct data types from the state.
        Returns:
-            dict: The updated state with the 'tags' key containing a list of probable HTML tags.
+            dict: The updated state with the input key containing a list of probable HTML tags.
        Raises:
-            KeyError: If 'user_input' or 'url' is not found in the state, indicating that the
+            KeyError: If input keys are not found in the state, indicating that the
                      necessary information for generating tag predictions is missing.
        """
--- a/scrapegraphai/nodes/image_to_text_node.py
+++ b/scrapegraphai/nodes/image_to_text_node.py
@ -1,45 +1,44 @@
 """
-Module for the ImageToTextNode class.
+ImageToTextNode Module
 """
 from typing import List
 from .base_node import BaseNode
 class ImageToTextNode(BaseNode):
    """
-    A class representing a node that processes an image and returns the text description.
+    Retrieve an image from an URL and convert it to text using an ImageToText model.
    Attributes:
-        llm_model (OpenAIImageToText): An instance of the OpenAIImageToText class.
+        llm_model: An instance of the language model client used for image-to-text conversion.
        verbose (bool): A flag indicating whether to show print statements during execution.
-    Methods:
+    Args:
-        execute(state, url): Execute the node's logic and return the updated state.
+        input (str): Boolean expression defining the input keys needed from the state.
        output (List[str]): List of output keys to be updated in the state.
        node_config (dict): Additional configuration for the node.
        node_name (str): The unique identifier name for the node, defaulting to "ImageToText".
    """
    def __init__(self, input: str, output: List[str], node_config: dict,
                 node_name: str = "ImageToText"):
        """
        Initializes an instance of the ImageToTextNode class.
        Args:
            input (str): The input for the node.
            output (List[str]): The output of the node.
            node_config (dict): Configuration for the model.
            node_name (str): Name of the node.
        """
        super().__init__(node_name, "node", input, output, 1, node_config)
        self.llm_model = node_config["llm_model"]
        self.verbose = True if node_config is None else node_config.get("verbose", False)
    def execute(self, state: dict) -> dict:
        """
-        Execute the node's logic and return the updated state.
+        Generate text from an image using an image-to-text model. The method retrieves the image
        from the URL provided in the state.
        Args:
-            state (dict): The current state of the graph.
+            state (dict): The current state of the graph. The input keys will be used to fetch the
                            correct data types from the state.
        Returns:
-            dict: The updated state after executing this node.
+            dict: The updated state with the input key containing the text extracted from the image.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/parse_node.py
+++ b/scrapegraphai/nodes/parse_node.py
@ -1,6 +1,7 @@
 """
-Module for parsing the HTML node
+ParseNode Module
 """
 from typing import List
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_transformers import Html2TextTransformer
@ -10,56 +11,40 @@ from .base_node import BaseNode
 class ParseNode(BaseNode):
    """
    A node responsible for parsing HTML content from a document. 
-    It uses BeautifulSoupTransformer for parsing, providing flexibility in extracting
+    The parsed content is split into chunks for further processing.
    specific parts of an HTML document.
    This node enhances the scraping workflow by allowing for targeted extraction of 
    content, thereby optimizing the processing of large HTML documents.
    Attributes:
-        node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode".
+        verbose (bool): A flag indicating whether to show print statements during execution.
        node_type (str): The type of the node, set to "node" indicating a standard operational node.
    Args:
-        node_name (str, optional): The unique identifier name for the node. 
+        input (str): Boolean expression defining the input keys needed from the state.
-        Defaults to "ParseHTMLNode".
+        output (List[str]): List of output keys to be updated in the state.
-
+        node_config (dict): Additional configuration for the node.
-    Methods:
+        node_name (str): The unique identifier name for the node, defaulting to "Parse".
        execute(state): Parses the HTML document contained within the state using 
        the specified tags, if provided, and updates the state with the parsed content.
    """
    def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "Parse"):
        """
        Initializes the ParseHTMLNode with a node name.
        Args:
            doc_type (str): type of the input document
            chunks_size (int): size of the chunks to split the document
            node_name (str): name of the node
            node_type (str, optional): type of the node
        """
        super().__init__(node_name, "node", input, output, 1, node_config)
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self,  state):
+    def execute(self,  state: dict) -> dict:
        """
-        Executes the node's logic to parse the HTML document based on specified tags. 
+        Executes the node's logic to parse the HTML document content and split it into chunks.
        If tags are provided in the state, the document is parsed accordingly; otherwise, 
        the document remains unchanged. The method updates the state with either the original 
        or parsed document under the 'parsed_document' key.
        Args:
-            state (dict): The current state of the graph, expected to contain 
+            state (dict): The current state of the graph. The input keys will be used to fetch the
-            'document' within 'keys', and optionally 'tags' for targeted parsing.
+                            correct data from the state.
        Returns:
-            dict: The updated state with the 'parsed_document' key containing the parsed content,
+            dict: The updated state with the output key containing the parsed content chunks.
                  if tags were provided, or the original document otherwise.
        Raises:
-            KeyError: If 'document' is not found in the state, indicating that the necessary 
+            KeyError: If the input keys are not found in the state, indicating that the
-                      information for parsing is missing.
+                        necessary information for parsing the content is missing.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@ -1,5 +1,5 @@
 """
-Module for parsing the HTML node
+RAGNode Module
 """
 from typing import List
@ -18,46 +18,44 @@ from .base_node import BaseNode
 class RAGNode(BaseNode):
    """
    A node responsible for compressing the input tokens and storing the document
-    in a vector database for retrieval.
+    in a vector database for retrieval. Relevant chunks are stored in the state.
    It allows scraping of big documents without exceeding the token limit of the language model.
    Attributes:
-        node_name (str): The unique identifier name for the node, defaulting to "ParseHTMLNode".
+        llm_model: An instance of a language model client, configured for generating answers.
-        node_type (str): The type of the node, set to "node" indicating a standard operational node.
+        embedder_model: An instance of an embedding model client, configured for generating embeddings.
        verbose (bool): A flag indicating whether to show print statements during execution.
    Args:
-        node_name (str, optional): The unique identifier name for the node.
+        input (str): Boolean expression defining the input keys needed from the state.
-        Defaults to "ParseHTMLNode".
+        output (List[str]): List of output keys to be updated in the state.
-
+        node_config (dict): Additional configuration for the node.
-    Methods:
+        node_name (str): The unique identifier name for the node, defaulting to "Parse".
        execute(state): Parses the HTML document contained within the state using
        the specified tags, if provided, and updates the state with the parsed content.
    """
    def __init__(self, input: str, output: List[str], node_config: dict, node_name: str = "RAG"):
        """
        Initializes the ParseHTMLNode with a node name.
        """
        super().__init__(node_name, "node", input, output, 2, node_config)
        self.llm_model = node_config["llm"]
        self.embedder_model = node_config.get("embedder_model", None)
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
-        Executes the node's logic to implement RAG (Retrieval-Augmented Generation)
+        Executes the node's logic to implement RAG (Retrieval-Augmented Generation).
        The method updates the state with relevant chunks of the document.
        Args:
-            state (dict): The state containing the 'document' key with the HTML content
+            state (dict): The current state of the graph. The input keys will be used to fetch the
                            correct data from the state.
        Returns:
-            dict: The updated state containing the 'relevant_chunks' key with the relevant chunks.
+            dict: The updated state with the output key containing the relevant chunks of the document.
        Raises:
-            KeyError: If 'document' is not found in the state, indicating that the necessary
+            KeyError: If the input keys are not found in the state, indicating that the
-                      information for parsing is missing.
+                        necessary information for compressing the content is missing.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/robots_node.py
+++ b/scrapegraphai/nodes/robots_node.py
@ -1,6 +1,7 @@
 """
-Module for checking if a website is scrapepable or not
+RobotsNode Module
 """
 from typing import List
 from urllib.parse import urlparse
 from langchain_community.document_loaders import AsyncHtmlLoader
@ -12,75 +13,53 @@ from ..helpers import robots_dictionary
 class RobotsNode(BaseNode):
    """
-    A node responsible for checking if a website is scrapepable or not.
+    A node responsible for checking if a website is scrapeable or not based on the robots.txt file.
-    It uses the AsyncHtmlLoader for asynchronous
+    It uses a language model to determine if the website allows scraping of the provided path.
    document loading.
    This node acts as a starting point in many scraping workflows, preparing the state
    with the necessary HTML content for further processing by subsequent nodes in the graph.
    Attributes:
-        This node acts as a starting point in many scraping workflows, preparing the state
+        llm_model: An instance of the language model client used for checking scrapeability.
-    with the necessary HTML content for further processing by subsequent nodes in the graph.
+        force_scraping (bool): A flag indicating whether scraping should be enforced even
-
+                               if disallowed by robots.txt.
-    Attributes:
+        verbose (bool): A flag indicating whether to show print statements during execution.
        node_name (str): The unique identifier name for the node.
        node_type (str): The type of the node, defaulting to "node". This categorization
                         helps in determining the node's role and behavior within the graph.
                         The "node" type is used for standard operational nodes.
    Args:
-        node_name (str): The unique identifier name for the node. This name is used to
+        input (str): Boolean expression defining the input keys needed from the state.
-                         reference the node within the graph.
+        output (List[str]): List of output keys to be updated in the state.
-        node_type (str, optional): The type of the node, limited to "node" or
+        node_config (dict): Additional configuration for the node.
                                   "conditional_node". Defaults to "node".
        node_config (dict): Configuration parameters for the node.
        force_scraping (bool): A flag indicating whether scraping should be enforced even
-                               if disallowed by robots.txt. Defaults to True.
+                                 if disallowed by robots.txt. Defaults to True.
-        input (str): Input expression defining how to interpret the incoming data.
+        node_name (str): The unique identifier name for the node, defaulting to "Robots".
        output (List[str]): List of output keys where the results will be stored.
    Methods:
        execute(state): Fetches the HTML content for the URL specified in the state and
                        updates the state with this content under the 'document' key.
                        The 'url' key must be present in the state for the operation
                        to succeed.
    """
    def __init__(self, input: str, output: List[str],  node_config: dict, force_scraping=True,
                 node_name: str = "Robots"):
        """
        Initializes the RobotsNode with a node name, input/output expressions
         and node configuration.
        Arguments:
            input (str): Input expression defining how to interpret the incoming data.
            output (List[str]): List of output keys where the results will be stored.
            node_config (dict): Configuration parameters for the node.
            force_scraping (bool): A flag indicating whether scraping should be enforced even
                                   if disallowed by robots.txt. Defaults to True.
            node_name (str, optional): The unique identifier name for the node.
                                       Defaults to "Robots".
        """
        super().__init__(node_name, "node", input, output, 1)
        self.llm_model = node_config["llm"]
        self.force_scraping = force_scraping
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
-        Executes the node's logic to fetch HTML content from a specified URL and
+        Checks if a website is scrapeable based on the robots.txt file and updates the state
-        update the state with this content.
+        with the scrapeability status. The method constructs a prompt for the language model,
        submits it, and parses the output to determine if scraping is allowed.
        Args:
-            state (dict): The current state of the graph, expected to contain a 'url' key.
+            state (dict): The current state of the graph. The input keys will be used to fetch the
        Returns:
-            dict: The updated state with a new 'document' key containing the fetched HTML content.
+            dict: The updated state with the output key containing the scrapeability status.
        Raises:
-            KeyError: If the 'url' key is not found in the state, indicating that the
+            KeyError: If the input keys are not found in the state, indicating that the
-                      necessary information to perform the operation is missing.
+                        necessary information for checking scrapeability is missing.
            KeyError: If the large language model is not found in the robots_dictionary.
            ValueError: If the website is not scrapeable based on the robots.txt file and
                        scraping is not enforced.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/search_internet_node.py
+++ b/scrapegraphai/nodes/search_internet_node.py
@ -1,6 +1,7 @@
 """
-Module for generating the answer node
+SearchInternetNode Module
 """
 from typing import List
 from langchain.output_parsers import CommaSeparatedListOutputParser
 from langchain.prompts import PromptTemplate
@ -10,63 +11,46 @@ from .base_node import BaseNode
 class SearchInternetNode(BaseNode):
    """
-    A node that generates an answer by querying a language model (LLM) based on the user's input
+    A node that generates a search query based on the user's input and searches the internet
-    and the content extracted from a webpage. It constructs a prompt from the user's input
+    for relevant information. The node constructs a prompt for the language model, submits it,
-    and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
+    and processes the output to generate a search query. It then uses the search query to find
-    an answer.
+    relevant information on the internet and updates the state with the generated answer.
    Attributes:
-        node_name (str): The unique identifier name for the node.
+        llm_model: An instance of the language model client used for generating search queries.
-        node_type (str): The type of the node, set to "node" indicating a standard operational node.
+        verbose (bool): A flag indicating whether to show print statements during execution.
        input (str): The user input used to construct the prompt.
        output (List[str]): The keys in the state dictionary 
                            where the generated answer will be stored.
        model_config (dict): Configuration parameters for the language model client.
    Args:
-        input (str): The user input used to construct the prompt.
+        input (str): Boolean expression defining the input keys needed from the state.
-        output (List[str]): The keys in the state dictionary where the
+        output (List[str]): List of output keys to be updated in the state.
-                             generated answer will be stored.
+        node_config (dict): Additional configuration for the node.
-        model_config (dict): Configuration parameters for the language model client.
+        node_name (str): The unique identifier name for the node, defaulting to "SearchInternet".
        node_name (str, optional): The unique identifier name for the node. 
    Methods:
        execute(state): Processes the input and document from the state to generate an answer,
                        updating the state with the generated answer under the 'answer' key.
    """
    def __init__(self, input: str, output: List[str], node_config: dict,
                 node_name: str = "SearchInternet"):
        """
        Initializes the SearchInternetNode with input, output, model configuration, and a node name.
        Args:
            input (str): The user input used to construct the prompt.
            output (List[str]): The keys in the state dictionary where the
             generated answer will be stored.
            model_config (dict): Configuration parameters for the language model client.
            node_name (str): The unique identifier name for the node.
        """
        super().__init__(node_name, "node", input, output, 1, node_config)
        self.llm_model = node_config["llm"]
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
        Generates an answer by constructing a prompt from the user's input and the scraped
        content, querying the language model, and parsing its response.
-        The method updates the state with the generated answer under the 'answer' key.
+        The method updates the state with the generated answer.
        Args:
-            state (dict): The current state of the graph, expected to contain 'user_input',
+            state (dict): The current state of the graph. The input keys will be used to fetch the
-                          and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
+                            correct data types from the state.
        Returns:
-            dict: The updated state with the 'answer' key containing the generated answer.
+            dict: The updated state with the output key containing the generated answer.
        Raises:
-            KeyError: If 'user_input' or 'document' is not found in the state, indicating
+            KeyError: If the input keys are not found in the state, indicating that the
-                      that the necessary information for generating an answer is missing.
+                        necessary information for generating the answer is missing.
        """
        if self.verbose:
--- a/scrapegraphai/nodes/search_link_node.py
+++ b/scrapegraphai/nodes/search_link_node.py
@ -1,6 +1,7 @@
 """
-Module for generating the answer node
+SearchLinkNode Module
 """
 # Imports from standard library
 from typing import List
 from tqdm import tqdm
@ -18,58 +19,42 @@ from .base_node import BaseNode
 class SearchLinkNode(BaseNode):
    """
-    A node that generates an answer using a language model (LLM) based on the user's input
+    A node that look for all the links in a web page and returns them.
-    and the content extracted from a webpage. It constructs a prompt from the user's input
+    It initially tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
    and the scraped content, feeds it to the LLM, and parses the LLM's response to produce
    an answer.
    Attributes:
-        llm: An instance of a language model client, configured for generating answers.
+        llm_model: An instance of the language model client used for generating answers.
-        node_name (str): The unique identifier name for the node, defaulting 
+        verbose (bool): A flag indicating whether to show print statements during execution.
        to "GenerateAnswerNode".
        node_type (str): The type of the node, set to "node" indicating a 
        standard operational node.
    Args:
-        llm: An instance of the language model client (e.g., ChatOpenAI) used 
+        input (str): Boolean expression defining the input keys needed from the state.
-        for generating answers.
+        output (List[str]): List of output keys to be updated in the state.
-        node_name (str, optional): The unique identifier name for the node. 
+        node_config (dict): Additional configuration for the node.
-        Defaults to "GenerateAnswerNode".
+        node_name (str): The unique identifier name for the node, defaulting to "GenerateAnswer".
    Methods:
        execute(state): Processes the input and document from the state to generate an answer,
                        updating the state with the generated answer under the 'answer' key.
    """
    def __init__(self, input: str, output: List[str], node_config: dict,
                 node_name: str = "GenerateLinks"):
        """
        Initializes the GenerateAnswerNode with a language model client and a node name.
        Args:
            llm: An instance of the OpenAIImageToText class.
            node_name (str): name of the node
        """
        super().__init__(node_name, "node", input, output, 1, node_config)
        self.llm_model = node_config["llm"]
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
-        Generates an answer by constructing a prompt from the user's input and the scraped
+        Generates a list of links by extracting them from the provided HTML content.
-        content, querying the language model, and parsing its response.
+        First, it tries to extract the links using classical methods, if it fails it uses the LLM to extract the links.
        The method updates the state with the generated answer under the 'answer' key.
        Args:
-            state (dict): The current state of the graph, expected to contain 'user_input',
+            state (dict): The current state of the graph. The input keys will be used to fetch the
-                          and optionally 'parsed_document' or 'relevant_chunks' within 'keys'.
+                            correct data types from the state.
        Returns:
-            dict: The updated state with the 'answer' key containing the generated answer.
+            dict: The updated state with the output key containing the list of links.
        Raises:
-            KeyError: If 'user_input' or 'document' is not found in the state, indicating
+            KeyError: If the input keys are not found in the state, indicating that the
-                      that the necessary information for generating an answer is missing.
+                        necessary information for generating the answer is missing.
        """
        if self.verbose:
@ -90,7 +75,7 @@ class SearchLinkNode(BaseNode):
        except Exception as e:
            if self.verbose:
-                print("error on using classical methods. Using LLM for getting the links")
+                print("Error extracting links using classical methods. Using LLM to extract links.")
            output_parser = JsonOutputParser()
--- a/scrapegraphai/nodes/text_to_speech_node.py
+++ b/scrapegraphai/nodes/text_to_speech_node.py
@ -1,39 +1,47 @@
 """
 TextToSpeechNode Module
 """
 """
 Module for parsing the text to voice
 """
 from typing import List
 from .base_node import BaseNode
 class TextToSpeechNode(BaseNode):
    """
-    A class representing a node that processes text and returns the voice.
+    Converts text to speech using the specified text-to-speech model.
    Attributes:
-        llm (OpenAITextToSpeech): An instance of the OpenAITextToSpeech class.
+        tts_model: An instance of the text-to-speech model client.
        verbose (bool): A flag indicating whether to show print statements during execution.
-    Methods:
+    Args:
-        execute(state, text): Execute the node's logic and return the updated state.
+        input (str): Boolean expression defining the input keys needed from the state.
        output (List[str]): List of output keys to be updated in the state.
        node_config (dict): Additional configuration for the node.
        node_name (str): The unique identifier name for the node, defaulting to "TextToSpeech".
    """
    def __init__(self, input: str, output: List[str],
                 node_config: dict, node_name: str = "TextToSpeech"):
        """
        Initializes an instance of the TextToSpeechNode class.
        """
        super().__init__(node_name, "node", input, output, 1, node_config)
        self.tts_model = node_config["tts_model"]
        self.verbose = True if node_config is None else node_config.get("verbose", False)
-    def execute(self, state):
+    def execute(self, state: dict) -> dict:
        """
-        Execute the node's logic and return the updated state.
+        Converts text to speech using the specified text-to-speech model.
        Args:
            state (dict): The current state of the graph.
            text (str): The text to convert to speech.
-        :return: The updated state after executing this node.
+        Args:
            state (dict): The current state of the graph. The input keys will be used to fetch the
                            correct data types from the state.
        Returns:
            dict: The updated state with the output key containing the audio generated from the text.
        Raises:
            KeyError: If the input keys are not found in the state, indicating that the
                        necessary information for generating the audio is missing.
        """
        if self.verbose: