docs: graphs and helpers docstrings

This commit is contained in:
Marco Perini 2024-05-02 00:23:38 +02:00
parent 18c20eb03d
commit 0631985e61
14 changed files with 304 additions and 80 deletions

View File

@ -1,5 +1,5 @@
"""
Module for making the graph building
GraphBuilder Module
"""
from langchain_core.prompts import ChatPromptTemplate

View File

@ -1,6 +1,7 @@
"""
__init__.py file for graphs folder
"""
from .base_graph import BaseGraph
from .smart_scraper_graph import SmartScraperGraph
from .speech_graph import SpeechGraph

View File

@ -1,6 +1,7 @@
"""
Module having abstract class for creating all the graphs
AbstractGraph Module
"""
from abc import ABC, abstractmethod
from typing import Optional
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
@ -9,13 +10,34 @@ from ..helpers import models_tokens
class AbstractGraph(ABC):
"""
Abstract class representing a generic graph-based tool.
Scaffolding class for creating a graph representation and executing it.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
Args:
prompt (str): The prompt for the graph.
config (dict): Configuration parameters for the graph.
source (str, optional): The source of the graph.
Example:
>>> class MyGraph(AbstractGraph):
... def _create_graph(self):
... # Implementation of graph creation here
... return graph
...
>>> my_graph = MyGraph("Example Graph", {"llm": {"model": "gpt-3.5-turbo"}}, "example_source")
>>> result = my_graph.run()
"""
def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
"""
Initializes the AbstractGraph with a prompt, file source, and configuration.
"""
self.prompt = prompt
self.source = source
self.config = config
@ -32,10 +54,20 @@ class AbstractGraph(ABC):
self.final_state = None
self.execution_info = None
def _create_llm(self, llm_config: dict):
def _create_llm(self, llm_config: dict) -> object:
"""
Creates an instance of the language model (OpenAI or Gemini) based on configuration.
Create a large language model instance based on the configuration provided.
Args:
llm_config (dict): Configuration parameters for the language model.
Returns:
object: An instance of the language model client.
Raises:
KeyError: If the model is not supported.
"""
llm_defaults = {
"temperature": 0,
"streaming": False
@ -104,8 +136,15 @@ class AbstractGraph(ABC):
def get_state(self, key=None) -> dict:
"""""
Obtain the current state
Get the final state of the graph.
Args:
key (str, optional): The key of the final state to retrieve.
Returns:
dict: The final state of the graph.
"""
if key is not None:
return self.final_state[key]
return self.final_state
@ -113,7 +152,11 @@ class AbstractGraph(ABC):
def get_execution_info(self):
"""
Returns the execution information of the graph.
Returns:
dict: The execution information of the graph.
"""
return self.execution_info
@abstractmethod

View File

@ -1,6 +1,7 @@
"""
Module for creating the base graphs
"""
BaseGraph Module
"""
import time
import warnings
from langchain_community.callbacks import get_openai_callback
@ -16,21 +17,33 @@ class BaseGraph:
key-value pair corresponds to the from-node and to-node relationship.
entry_point (str): The name of the entry point node from which the graph execution begins.
Methods:
execute(initial_state): Executes the graph's nodes starting from the entry point and
traverses the graph based on the provided initial state.
Args:
nodes (iterable): An iterable of node instances that will be part of the graph.
edges (iterable): An iterable of tuples where each tuple represents a directed edge
in the graph, defined by a pair of nodes (from_node, to_node).
entry_point (BaseNode): The node instance that represents the entry point of the graph.
Raises:
Warning: If the entry point node is not the first node in the list.
Example:
>>> BaseGraph(
... nodes=[
... fetch_node,
... parse_node,
... rag_node,
... generate_answer_node,
... ],
... edges=[
... (fetch_node, parse_node),
... (parse_node, rag_node),
... (rag_node, generate_answer_node)
... ],
... entry_point=fetch_node
... )
"""
def __init__(self, nodes: list, edges: list, entry_point: str):
"""
Initializes the graph with nodes, edges, and the entry point.
"""
self.nodes = nodes
self.edges = self._create_edges({e for e in edges})
@ -51,6 +64,7 @@ class BaseGraph:
Returns:
dict: A dictionary of edges with the from-node as keys and to-node as values.
"""
edge_dict = {}
for from_node, to_node in edges:
edge_dict[from_node.node_name] = to_node.node_name
@ -66,8 +80,10 @@ class BaseGraph:
initial_state (dict): The initial state to pass to the entry point node.
Returns:
dict: The state after execution has completed, which may have been altered by the nodes.
Tuple[dict, list]: A tuple containing the final state of the execution and a list
of execution information for each node.
"""
current_node_name = self.nodes[0]
state = initial_state

View File

@ -1,6 +1,7 @@
"""
Module for creating the smart scraper
JSONScraperGraph Module
"""
from .base_graph import BaseGraph
from ..nodes import (
FetchNode,
@ -13,22 +14,44 @@ from .abstract_graph import AbstractGraph
class JSONScraperGraph(AbstractGraph):
"""
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
information from web pages using a natural language model to interpret and answer prompts.
JSONScraperGraph defines a scraping pipeline for JSON files.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
Args:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
Example:
>>> json_scraper = JSONScraperGraph(
... "List me all the attractions in Chioggia.",
... "data/chioggia.json",
... {"llm": {"model": "gpt-3.5-turbo"}}
... )
>>> result = json_scraper.run()
"""
def __init__(self, prompt: str, source: str, config: dict):
"""
Initializes the JsonScraperGraph with a prompt, source, and configuration.
"""
super().__init__(prompt, config, source)
self.input_key = "json" if source.endswith("json") else "json_dir"
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping.
Returns:
BaseGraph: A graph instance representing the web scraping workflow.
"""
fetch_node = FetchNode(
input="json_dir",
output=["doc"],
@ -81,7 +104,11 @@ class JSONScraperGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping process and returns the answer to the prompt.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

View File

@ -1,6 +1,7 @@
"""
Module for creating the smart scraper
ScriptCreatorGraph Module
"""
from .base_graph import BaseGraph
from ..nodes import (
FetchNode,
@ -13,24 +14,47 @@ from .abstract_graph import AbstractGraph
class ScriptCreatorGraph(AbstractGraph):
"""
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
information from web pages using a natural language model to interpret and answer prompts.
ScriptCreatorGraph defines a scraping pipeline for generating web scraping scripts.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
model_token (int): The token limit for the language model.
library (str): The library used for web scraping.
Args:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
Example:
>>> script_creator = ScriptCreatorGraph(
... "List me all the attractions in Chioggia.",
... "https://en.wikipedia.org/wiki/Chioggia",
... {"llm": {"model": "gpt-3.5-turbo"}}
... )
>>> result = script_creator.run()
"""
def __init__(self, prompt: str, source: str, config: dict):
"""
Initializes the ScriptCreatorGraph with a prompt, source, and configuration.
"""
self.library = config['library']
super().__init__(prompt, config, source)
self.input_key = "url" if source.startswith("http") else "local_dir"
self.library = config['library']
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping.
Returns:
BaseGraph: A graph instance representing the web scraping workflow.
"""
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
@ -76,7 +100,11 @@ class ScriptCreatorGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping process and returns the answer to the prompt.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

View File

@ -1,6 +1,7 @@
"""
Module for making the search on the intenet
SearchGraph Module
"""
from .base_graph import BaseGraph
from ..nodes import (
SearchInternetNode,
@ -14,13 +15,37 @@ from .abstract_graph import AbstractGraph
class SearchGraph(AbstractGraph):
"""
Module for searching info on the internet
SearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
It only requires a user prompt to search the internet and generate an answer.
Attributes:
prompt (str): The user prompt to search the internet.
llm_model (dict): The configuration for the language model.
embedder_model (dict): The configuration for the embedder model.
headless (bool): A flag to run the browser in headless mode.
verbose (bool): A flag to display the execution information.
model_token (int): The token limit for the language model.
Args:
prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph.
Example:
>>> search_graph = SearchGraph(
... "What is Chioggia famous for?",
... {"llm": {"model": "gpt-3.5-turbo"}}
... )
>>> result = search_graph.run()
"""
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping and searching.
Returns:
BaseGraph: A graph instance representing the web scraping and searching workflow.
"""
search_internet_node = SearchInternetNode(
input="user_prompt",
output=["url"],
@ -83,7 +108,11 @@ class SearchGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping and searching process.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt}
self.final_state, self.execution_info = self.graph.execute(inputs)

View File

@ -1,6 +1,7 @@
"""
Module for creating the smart scraper
SmartScraperGraph Module
"""
from .base_graph import BaseGraph
from ..nodes import (
FetchNode,
@ -13,22 +14,44 @@ from .abstract_graph import AbstractGraph
class SmartScraperGraph(AbstractGraph):
"""
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
information from web pages using a natural language model to interpret and answer prompts.
SmartScraper is a scraping pipeline that automates the process of extracting information from web pages
using a natural language model to interpret and answer prompts.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
Args:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
Example:
>>> smart_scraper = SmartScraperGraph(
... "List me all the attractions in Chioggia.",
... "https://en.wikipedia.org/wiki/Chioggia",
... {"llm": {"model": "gpt-3.5-turbo"}}
... )
>>> result = smart_scraper.run()
)
"""
def __init__(self, prompt: str, source: str, config: dict):
"""
Initializes the SmartScraperGraph with a prompt, source, and configuration.
"""
super().__init__(prompt, config, source)
self.input_key = "url" if source.startswith("http") else "local_dir"
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping.
Returns:
BaseGraph: A graph instance representing the web scraping workflow.
"""
fetch_node = FetchNode(
input="url | local_dir",
@ -81,8 +104,12 @@ class SmartScraperGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping process and returns the answer to the prompt.
Executes the scraping process and returns the answer to the prompt.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

View File

@ -1,6 +1,7 @@
"""
Module for converting text to speach
SpeechGraph Module
"""
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
from ..models import OpenAITextToSpeech
from .base_graph import BaseGraph
@ -16,22 +17,43 @@ from .abstract_graph import AbstractGraph
class SpeechGraph(AbstractGraph):
"""
SpeechSummaryGraph is a tool that automates the process of extracting and summarizing
information from web pages, then converting that summary into spoken word via an MP3 file.
SpeechyGraph is a scraping pipeline that scrapes the web, provide an answer to a given prompt, and generate an audio file.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
model_token (int): The token limit for the language model.
Args:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
Example:
>>> speech_graph = SpeechGraph(
... "List me all the attractions in Chioggia and generate an audio summary.",
... "https://en.wikipedia.org/wiki/Chioggia",
... {"llm": {"model": "gpt-3.5-turbo"}}
"""
def __init__(self, prompt: str, source: str, config: dict):
"""
Initializes the SmartScraperGraph with a prompt, source, and configuration.
"""
super().__init__(prompt, config, source)
self.input_key = "url" if source.startswith("http") else "local_dir"
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping and summarization.
Creates the graph of nodes representing the workflow for web scraping and audio generation.
Returns:
BaseGraph: A graph instance representing the web scraping and audio generation workflow.
"""
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
@ -93,8 +115,12 @@ class SpeechGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping, summarization, and text-to-speech process.
Executes the scraping process and returns the answer to the prompt.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)
@ -105,4 +131,4 @@ class SpeechGraph(AbstractGraph):
"output_path", "output.mp3"))
print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}")
return self.final_state
return self.final_state.get("answer", "No answer found.")

View File

@ -1,6 +1,7 @@
"""
Module for creating the smart scraper
XMLScraperGraph Module
"""
from .base_graph import BaseGraph
from ..nodes import (
FetchNode,
@ -13,22 +14,46 @@ from .abstract_graph import AbstractGraph
class XMLScraperGraph(AbstractGraph):
"""
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
information from web pages using a natural language model to interpret and answer prompts.
XMLScraperGraph is a scraping pipeline that extracts information from XML files using a natural
language model to interpret and answer prompts.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
model_token (int): The token limit for the language model.
Args:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
Example:
>>> xml_scraper = XMLScraperGraph(
... "List me all the attractions in Chioggia.",
... "data/chioggia.xml",
... {"llm": {"model": "gpt-3.5-turbo"}}
... )
>>> result = xml_scraper.run()
"""
def __init__(self, prompt: str, source: str, config: dict):
"""
Initializes the XmlScraperGraph with a prompt, source, and configuration.
"""
super().__init__(prompt, config, source)
self.input_key = "xml" if source.endswith("xml") else "xml_dir"
def _create_graph(self):
def _create_graph(self) -> BaseGraph:
"""
Creates the graph of nodes representing the workflow for web scraping.
Returns:
BaseGraph: A graph instance representing the web scraping workflow.
"""
fetch_node = FetchNode(
input="xml_dir",
output=["doc"],
@ -81,7 +106,11 @@ class XMLScraperGraph(AbstractGraph):
def run(self) -> str:
"""
Executes the web scraping process and returns the answer to the prompt.
Returns:
str: The answer to the prompt.
"""
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

View File

@ -1,7 +1,7 @@
"""
__init__.py for th e helpers folder
"""
from .nodes_metadata import nodes_metadata
from .schemas import graph_schema
from .models_tokens import models_tokens

View File

@ -1,6 +1,7 @@
"""
Models token
"""
models_tokens = {
"openai": {
"gpt-3.5-turbo-0125": 16385,

View File

@ -1,7 +1,7 @@
"""
Module for mapping the models in ai agents
"""
robots_dictionary = {
"gpt-3.5-turbo": ["GPTBot", "ChatGPT-user"],
"gpt-4-turbo": ["GPTBot", "ChatGPT-user"],

View File

@ -17,7 +17,18 @@ class BaseNode(ABC):
output (List[str]): List of
min_input_len (int): Minimum required number of input keys.
node_config (Optional[dict]): Additional configuration for the node.
Args:
node_name (str): Name for identifying the node.
node_type (str): Type of the node; must be 'node' or 'conditional_node'.
input (str): Expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
min_input_len (int, optional): Minimum required number of input keys; defaults to 1.
node_config (Optional[dict], optional): Additional configuration for the node; defaults to None.
Raises:
ValueError: If `node_type` is not one of the allowed types.
Example:
>>> class MyNode(BaseNode):
... def execute(self, state):
@ -31,20 +42,6 @@ class BaseNode(ABC):
def __init__(self, node_name: str, node_type: str, input: str, output: List[str],
min_input_len: int = 1, node_config: Optional[dict] = None):
"""
Initialize the instance with the node's name, type, input/output specifications, and configuration details.
Args:
node_name (str): Name for identifying the node.
node_type (str): Type of the node; must be 'node' or 'conditional_node'.
input (str): Expression defining the input keys needed from the state.
output (List[str]): List of output keys to be updated in the state.
min_input_len (int, optional): Minimum required number of input keys; defaults to 1.
node_config (Optional[dict], optional): Additional configuration for the node; defaults to None.
Raises:
ValueError: If `node_type` is not one of the allowed types.
"""
self.node_name = node_name
self.input = input