docs: update utils docstrings

This commit is contained in:
Perinim 2024-05-01 12:35:12 +02:00
parent 96975b2e36
commit cf038b33ea
9 changed files with 127 additions and 59 deletions

View File

@ -6,20 +6,27 @@ import sys
import pandas as pd
def convert_to_csv(data: dict, filename: str, position: str = None):
def convert_to_csv(data: dict, filename: str, position: str = None) -> None:
"""
Converts a dictionary to a CSV file and saves it.
Converts a dictionary to a CSV file and saves it at a specified location.
Args:
data (dict): Data to be converted to CSV.
position (str): Optional path where the file should be saved. If not provided,
the directory of the caller script will be used.
data (dict): The data to be converted into CSV format.
filename (str): The name of the output CSV file, without the '.csv' extension.
position (str, optional): The file path where the CSV should be saved. Defaults to the directory of the caller script if not provided.
Returns:
None: The function does not return anything.
Raises:
FileNotFoundError: If the specified directory does not exist.
PermissionError: If the program lacks write permission for the directory.
TypeError: If the input data is not a dictionary.
Exception: For other potential errors during DataFrame creation or CSV saving.
FileNotFoundError: If the specified directory does not exist.
PermissionError: If write permissions are lacking for the directory.
TypeError: If `data` is not a dictionary.
Exception: For other issues that may arise during the creation or saving of the CSV file.
Example:
>>> convert_to_csv({'id': [1, 2], 'value': [10, 20]}, 'output', '/path/to/save')
Saves a CSV file named 'output.csv' at '/path/to/save'.
"""
if ".csv" in filename:

View File

@ -6,23 +6,33 @@ import os
import sys
def convert_to_json(data: dict, filename: str, position: str = None):
def convert_to_json(data: dict, filename: str, position: str = None) -> None:
"""
Convert data to JSON format and save it to a file.
Converts a dictionary to a JSON file and saves it at a specified location.
Args:
data (dict): Data to save.
filename (str): Name of the file to save without .json extension.
position (str): Directory where the file should be saved. If None,
the directory of the caller script will be used.
data (dict): The data to be converted into JSON format.
filename (str): The name of the output JSON file, without the '.json' extension.
position (str, optional): The file path where the JSON file should be saved. Defaults to the directory of the caller script if not provided.
Returns:
None: The function does not return anything.
Raises:
ValueError: If filename contains '.json'.
FileNotFoundError: If the specified directory does not exist.
PermissionError: If the program does not have permission to write to the directory.
ValueError: If 'filename' contains '.json'.
FileNotFoundError: If the specified directory does not exist.
PermissionError: If write permissions are lacking for the directory.
Example:
>>> convert_to_json({'id': [1, 2], 'value': [10, 20]}, 'output', '/path/to/save')
Saves a JSON file named 'output.json' at '/path/to/save'.
Notes:
This function automatically ensures the directory exists before attempting to write the file. If the directory does not exist, it will attempt to create it.
"""
if ".json" in filename:
filename = filename.replace(".json", "") # Remove .csv extension
filename = filename.replace(".json", "") # Remove .json extension
# Get the directory of the caller script
if position is None:

View File

@ -4,12 +4,30 @@ Parse_state_key module
import re
def parse_expression(expression, state: dict):
"""
Function for parsing the expressions
Args:
state (dict): state to elaborate
def parse_expression(expression, state: dict) -> list:
"""
Parses a complex boolean expression involving state keys.
Args:
expression (str): The boolean expression to parse.
state (dict): Dictionary of state keys used to evaluate the expression.
Raises:
ValueError: If the expression is empty, has adjacent state keys without operators, invalid operator usage,
unbalanced parentheses, or if no state keys match the expression.
Returns:
list: A list of state keys that match the boolean expression, ensuring each key appears only once.
Example:
>>> parse_expression("user_input & (relevant_chunks | parsed_document | document)",
{"user_input": None, "document": None, "parsed_document": None, "relevant_chunks": None})
['user_input', 'relevant_chunks', 'parsed_document', 'document']
This function evaluates the expression to determine the logical inclusion of state keys based on provided boolean logic.
It checks for syntax errors such as unbalanced parentheses, incorrect adjacency of operators, and empty expressions.
"""
# Check for empty expression
if not expression:
raise ValueError("Empty expression.")

View File

@ -7,13 +7,17 @@ import pandas as pd
def prettify_exec_info(complete_result: list[dict]) -> pd.DataFrame:
"""
Transform the execution information of the graph into a DataFrame for better visualization.
Transforms the execution information of a graph into a DataFrame for enhanced visualization.
Args:
- complete_result (list[dict]): The complete execution information of the graph.
complete_result (list[dict]): The complete execution information of the graph.
Returns:
- pd.DataFrame: The execution information of the graph in a DataFrame.
pd.DataFrame: A DataFrame that organizes the execution information for better readability and analysis.
Example:
>>> prettify_exec_info([{'node': 'A', 'status': 'success'}, {'node': 'B', 'status': 'failure'}])
DataFrame with columns 'node' and 'status' showing execution results for each node.
"""
df_nodes = pd.DataFrame(complete_result)

View File

@ -4,26 +4,29 @@ Module for rotating proxies
from fp.fp import FreeProxy
def proxy_generator(num_ips: int):
def proxy_generator(num_ips: int) -> list:
"""
Rotates through a specified number of proxy IPs using the FreeProxy library.
Generates a specified number of proxy IP addresses using the FreeProxy library.
Args:
num_ips (int): The number of proxy IPs to rotate through.
num_ips (int): The number of proxy IPs to generate and rotate through.
Returns:
dict: A dictionary containing the rotated proxy IPs, indexed by their position in rotation.
list: A list of proxy IP addresses.
Example:
>>> proxy_generator(5)
{
0: '192.168.1.1:8080',
1: '103.10.63.135:8080',
2: '176.9.75.42:8080',
3: '37.57.216.2:8080',
4: '113.20.31.250:8080'
}
[
'192.168.1.1:8080',
'103.10.63.135:8080',
'176.9.75.42:8080',
'37.57.216.2:8080',
'113.20.31.250:8080'
]
This function fetches fresh proxies and indexes them, making it easy to manage multiple proxy configurations.
"""
res = []
for i in range(0, num_ips):

View File

@ -7,15 +7,20 @@ from minify_html import minify
def remover(html_content: str) -> str:
"""
This function processes HTML content, removes unnecessary tags
(including style tags), minifies the HTML, and retrieves the
title and body content.
Processes HTML content by removing unnecessary tags, minifying the HTML, and extracting the title and body content.
Parameters:
html_content (str): The HTML content to parse
Args:
html_content (str): The HTML content to be processed.
Returns:
str: The parsed title followed by the minified body content
str: A string combining the parsed title and the minified body content. If no body content is found, it indicates so.
Example:
>>> html_content = "<html><head><title>Example</title></head><body><p>Hello World!</p></body></html>"
>>> remover(html_content)
'Title: Example, Body: <body><p>Hello World!</p></body>'
This function is particularly useful for preparing HTML content for environments where bandwidth usage needs to be minimized.
"""
soup = BeautifulSoup(html_content, 'html.parser')

View File

@ -8,16 +8,25 @@ from googlesearch import search
def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
"""
Function that given a query it finds it on the intenet
"""
Searches the web for a given query using specified search engine options.
Args:
query (str): query to search on internet
search_engine (str, optional): type of browser, it could be DuckDuckGo or Google,
default: Google
max_results (int, optional): maximum number of results
query (str): The search query to find on the internet.
search_engine (str, optional): Specifies the search engine to use, options include 'Google' or 'DuckDuckGo'. Default is 'Google'.
max_results (int, optional): The maximum number of search results to return.
Returns:
List[str]: List of strings of web link
List[str]: A list of URLs as strings that are the search results.
Raises:
ValueError: If the search engine specified is neither 'Google' nor 'DuckDuckGo'.
Example:
>>> search_on_web("example query", search_engine="Google", max_results=5)
['http://example.com', 'http://example.org', ...]
This function allows switching between Google and DuckDuckGo to perform internet searches, returning a list of result URLs.
"""
if search_engine == "Google":

View File

@ -7,12 +7,18 @@ from typing import Union
def save_audio_from_bytes(byte_response: bytes, output_path: Union[str, Path]) -> None:
"""
Saves the byte response as an audio file.
Saves the byte response as an audio file to the specified path.
Args:
byte_response (bytes): The byte response containing the generated speech.
output_path (str or Path): The file path where the generated speech should be saved.
byte_response (bytes): The byte array containing audio data.
output_path (Union[str, Path]): The destination file path where the audio file will be saved.
Example:
>>> save_audio_from_bytes(b'audio data', 'path/to/audio.mp3')
This function writes the byte array containing audio data to a file, saving it as an audio file.
"""
if not isinstance(output_path, Path):
output_path = Path(output_path)

View File

@ -8,15 +8,21 @@ from ..helpers.models_tokens import models_tokens
def truncate_text_tokens(text: str, model: str, encoding_name: str) -> List[str]:
"""
It creates a list of strings to create max dimension tokenizable elements
Truncates text into chunks that are small enough to be processed by specified llm models.
Args:
text (str): The input text to be truncated into tokenizable elements.
model (str): The name of the language model to be used.
encoding_name (str): The name of the encoding to be used (default: EMBEDDING_ENCODING).
text (str): The input text to be truncated.
model (str): The name of the llm model to determine the maximum token limit.
encoding_name (str): The encoding strategy used to encode the text before truncation.
Returns:
List[str]: A list of tokenizable elements created from the input text.
List[str]: A list of text chunks, each within the token limit of the specified model.
Example:
>>> truncate_text_tokens("This is a sample text for truncation.", "GPT-3", "EMBEDDING_ENCODING")
["This is a sample text", "for truncation."]
This function ensures that each chunk of text can be tokenized by the specified model without exceeding the model's token limit.
"""
encoding = tiktoken.get_encoding(encoding_name)