mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
docs: graphs and helpers docstrings
This commit is contained in:
parent
18c20eb03d
commit
0631985e61
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Module for making the graph building
|
GraphBuilder Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
__init__.py file for graphs folder
|
__init__.py file for graphs folder
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from .smart_scraper_graph import SmartScraperGraph
|
from .smart_scraper_graph import SmartScraperGraph
|
||||||
from .speech_graph import SpeechGraph
|
from .speech_graph import SpeechGraph
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module having abstract class for creating all the graphs
|
AbstractGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
|
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
|
||||||
@ -9,13 +10,34 @@ from ..helpers import models_tokens
|
|||||||
|
|
||||||
class AbstractGraph(ABC):
|
class AbstractGraph(ABC):
|
||||||
"""
|
"""
|
||||||
Abstract class representing a generic graph-based tool.
|
Scaffolding class for creating a graph representation and executing it.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
source (str, optional): The source of the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> class MyGraph(AbstractGraph):
|
||||||
|
... def _create_graph(self):
|
||||||
|
... # Implementation of graph creation here
|
||||||
|
... return graph
|
||||||
|
...
|
||||||
|
>>> my_graph = MyGraph("Example Graph", {"llm": {"model": "gpt-3.5-turbo"}}, "example_source")
|
||||||
|
>>> result = my_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
|
def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
|
||||||
"""
|
|
||||||
Initializes the AbstractGraph with a prompt, file source, and configuration.
|
|
||||||
"""
|
|
||||||
self.prompt = prompt
|
self.prompt = prompt
|
||||||
self.source = source
|
self.source = source
|
||||||
self.config = config
|
self.config = config
|
||||||
@ -32,10 +54,20 @@ class AbstractGraph(ABC):
|
|||||||
self.final_state = None
|
self.final_state = None
|
||||||
self.execution_info = None
|
self.execution_info = None
|
||||||
|
|
||||||
def _create_llm(self, llm_config: dict):
|
def _create_llm(self, llm_config: dict) -> object:
|
||||||
"""
|
"""
|
||||||
Creates an instance of the language model (OpenAI or Gemini) based on configuration.
|
Create a large language model instance based on the configuration provided.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
llm_config (dict): Configuration parameters for the language model.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
object: An instance of the language model client.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
KeyError: If the model is not supported.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
llm_defaults = {
|
llm_defaults = {
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"streaming": False
|
"streaming": False
|
||||||
@ -104,8 +136,15 @@ class AbstractGraph(ABC):
|
|||||||
|
|
||||||
def get_state(self, key=None) -> dict:
|
def get_state(self, key=None) -> dict:
|
||||||
"""""
|
"""""
|
||||||
Obtain the current state
|
Get the final state of the graph.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
key (str, optional): The key of the final state to retrieve.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The final state of the graph.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if key is not None:
|
if key is not None:
|
||||||
return self.final_state[key]
|
return self.final_state[key]
|
||||||
return self.final_state
|
return self.final_state
|
||||||
@ -113,7 +152,11 @@ class AbstractGraph(ABC):
|
|||||||
def get_execution_info(self):
|
def get_execution_info(self):
|
||||||
"""
|
"""
|
||||||
Returns the execution information of the graph.
|
Returns the execution information of the graph.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The execution information of the graph.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return self.execution_info
|
return self.execution_info
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the base graphs
|
BaseGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import time
|
import time
|
||||||
import warnings
|
import warnings
|
||||||
from langchain_community.callbacks import get_openai_callback
|
from langchain_community.callbacks import get_openai_callback
|
||||||
@ -16,21 +17,33 @@ class BaseGraph:
|
|||||||
key-value pair corresponds to the from-node and to-node relationship.
|
key-value pair corresponds to the from-node and to-node relationship.
|
||||||
entry_point (str): The name of the entry point node from which the graph execution begins.
|
entry_point (str): The name of the entry point node from which the graph execution begins.
|
||||||
|
|
||||||
Methods:
|
|
||||||
execute(initial_state): Executes the graph's nodes starting from the entry point and
|
|
||||||
traverses the graph based on the provided initial state.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
nodes (iterable): An iterable of node instances that will be part of the graph.
|
nodes (iterable): An iterable of node instances that will be part of the graph.
|
||||||
edges (iterable): An iterable of tuples where each tuple represents a directed edge
|
edges (iterable): An iterable of tuples where each tuple represents a directed edge
|
||||||
in the graph, defined by a pair of nodes (from_node, to_node).
|
in the graph, defined by a pair of nodes (from_node, to_node).
|
||||||
entry_point (BaseNode): The node instance that represents the entry point of the graph.
|
entry_point (BaseNode): The node instance that represents the entry point of the graph.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Warning: If the entry point node is not the first node in the list.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> BaseGraph(
|
||||||
|
... nodes=[
|
||||||
|
... fetch_node,
|
||||||
|
... parse_node,
|
||||||
|
... rag_node,
|
||||||
|
... generate_answer_node,
|
||||||
|
... ],
|
||||||
|
... edges=[
|
||||||
|
... (fetch_node, parse_node),
|
||||||
|
... (parse_node, rag_node),
|
||||||
|
... (rag_node, generate_answer_node)
|
||||||
|
... ],
|
||||||
|
... entry_point=fetch_node
|
||||||
|
... )
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, nodes: list, edges: list, entry_point: str):
|
def __init__(self, nodes: list, edges: list, entry_point: str):
|
||||||
"""
|
|
||||||
Initializes the graph with nodes, edges, and the entry point.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.nodes = nodes
|
self.nodes = nodes
|
||||||
self.edges = self._create_edges({e for e in edges})
|
self.edges = self._create_edges({e for e in edges})
|
||||||
@ -51,6 +64,7 @@ class BaseGraph:
|
|||||||
Returns:
|
Returns:
|
||||||
dict: A dictionary of edges with the from-node as keys and to-node as values.
|
dict: A dictionary of edges with the from-node as keys and to-node as values.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
edge_dict = {}
|
edge_dict = {}
|
||||||
for from_node, to_node in edges:
|
for from_node, to_node in edges:
|
||||||
edge_dict[from_node.node_name] = to_node.node_name
|
edge_dict[from_node.node_name] = to_node.node_name
|
||||||
@ -66,8 +80,10 @@ class BaseGraph:
|
|||||||
initial_state (dict): The initial state to pass to the entry point node.
|
initial_state (dict): The initial state to pass to the entry point node.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: The state after execution has completed, which may have been altered by the nodes.
|
Tuple[dict, list]: A tuple containing the final state of the execution and a list
|
||||||
|
of execution information for each node.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
current_node_name = self.nodes[0]
|
current_node_name = self.nodes[0]
|
||||||
state = initial_state
|
state = initial_state
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the smart scraper
|
JSONScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
@ -13,22 +14,44 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class JSONScraperGraph(AbstractGraph):
|
class JSONScraperGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
|
JSONScraperGraph defines a scraping pipeline for JSON files.
|
||||||
information from web pages using a natural language model to interpret and answer prompts.
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> json_scraper = JSONScraperGraph(
|
||||||
|
... "List me all the attractions in Chioggia.",
|
||||||
|
... "data/chioggia.json",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
|
... )
|
||||||
|
>>> result = json_scraper.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict):
|
||||||
"""
|
|
||||||
Initializes the JsonScraperGraph with a prompt, source, and configuration.
|
|
||||||
"""
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source)
|
||||||
|
|
||||||
self.input_key = "json" if source.endswith("json") else "json_dir"
|
self.input_key = "json" if source.endswith("json") else "json_dir"
|
||||||
|
|
||||||
def _create_graph(self):
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping.
|
Creates the graph of nodes representing the workflow for web scraping.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping workflow.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="json_dir",
|
input="json_dir",
|
||||||
output=["doc"],
|
output=["doc"],
|
||||||
@ -81,7 +104,11 @@ class JSONScraperGraph(AbstractGraph):
|
|||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping process and returns the answer to the prompt.
|
Executes the web scraping process and returns the answer to the prompt.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the smart scraper
|
ScriptCreatorGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
@ -13,24 +14,47 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class ScriptCreatorGraph(AbstractGraph):
|
class ScriptCreatorGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
|
ScriptCreatorGraph defines a scraping pipeline for generating web scraping scripts.
|
||||||
information from web pages using a natural language model to interpret and answer prompts.
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
model_token (int): The token limit for the language model.
|
||||||
|
library (str): The library used for web scraping.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> script_creator = ScriptCreatorGraph(
|
||||||
|
... "List me all the attractions in Chioggia.",
|
||||||
|
... "https://en.wikipedia.org/wiki/Chioggia",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
|
... )
|
||||||
|
>>> result = script_creator.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict):
|
||||||
"""
|
|
||||||
Initializes the ScriptCreatorGraph with a prompt, source, and configuration.
|
|
||||||
"""
|
|
||||||
self.library = config['library']
|
|
||||||
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
self.library = config['library']
|
||||||
|
|
||||||
def _create_graph(self):
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping.
|
Creates the graph of nodes representing the workflow for web scraping.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping workflow.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="url | local_dir",
|
input="url | local_dir",
|
||||||
output=["doc"],
|
output=["doc"],
|
||||||
@ -76,7 +100,11 @@ class ScriptCreatorGraph(AbstractGraph):
|
|||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping process and returns the answer to the prompt.
|
Executes the web scraping process and returns the answer to the prompt.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for making the search on the intenet
|
SearchGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
SearchInternetNode,
|
SearchInternetNode,
|
||||||
@ -14,13 +15,37 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class SearchGraph(AbstractGraph):
|
class SearchGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
Module for searching info on the internet
|
SearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
|
||||||
|
It only requires a user prompt to search the internet and generate an answer.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The user prompt to search the internet.
|
||||||
|
llm_model (dict): The configuration for the language model.
|
||||||
|
embedder_model (dict): The configuration for the embedder model.
|
||||||
|
headless (bool): A flag to run the browser in headless mode.
|
||||||
|
verbose (bool): A flag to display the execution information.
|
||||||
|
model_token (int): The token limit for the language model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The user prompt to search the internet.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> search_graph = SearchGraph(
|
||||||
|
... "What is Chioggia famous for?",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
|
... )
|
||||||
|
>>> result = search_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _create_graph(self):
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping and searching.
|
Creates the graph of nodes representing the workflow for web scraping and searching.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping and searching workflow.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
search_internet_node = SearchInternetNode(
|
search_internet_node = SearchInternetNode(
|
||||||
input="user_prompt",
|
input="user_prompt",
|
||||||
output=["url"],
|
output=["url"],
|
||||||
@ -83,7 +108,11 @@ class SearchGraph(AbstractGraph):
|
|||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping and searching process.
|
Executes the web scraping and searching process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt}
|
inputs = {"user_prompt": self.prompt}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the smart scraper
|
SmartScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
@ -13,22 +14,44 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class SmartScraperGraph(AbstractGraph):
|
class SmartScraperGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
|
SmartScraper is a scraping pipeline that automates the process of extracting information from web pages
|
||||||
information from web pages using a natural language model to interpret and answer prompts.
|
using a natural language model to interpret and answer prompts.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> smart_scraper = SmartScraperGraph(
|
||||||
|
... "List me all the attractions in Chioggia.",
|
||||||
|
... "https://en.wikipedia.org/wiki/Chioggia",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
|
... )
|
||||||
|
>>> result = smart_scraper.run()
|
||||||
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict):
|
||||||
"""
|
|
||||||
Initializes the SmartScraperGraph with a prompt, source, and configuration.
|
|
||||||
"""
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
|
def _create_graph(self) -> BaseGraph:
|
||||||
def _create_graph(self):
|
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping.
|
Creates the graph of nodes representing the workflow for web scraping.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping workflow.
|
||||||
"""
|
"""
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="url | local_dir",
|
input="url | local_dir",
|
||||||
@ -81,8 +104,12 @@ class SmartScraperGraph(AbstractGraph):
|
|||||||
|
|
||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping process and returns the answer to the prompt.
|
Executes the scraping process and returns the answer to the prompt.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for converting text to speach
|
SpeechGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
|
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
|
||||||
from ..models import OpenAITextToSpeech
|
from ..models import OpenAITextToSpeech
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
@ -16,22 +17,43 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class SpeechGraph(AbstractGraph):
|
class SpeechGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
SpeechSummaryGraph is a tool that automates the process of extracting and summarizing
|
SpeechyGraph is a scraping pipeline that scrapes the web, provide an answer to a given prompt, and generate an audio file.
|
||||||
information from web pages, then converting that summary into spoken word via an MP3 file.
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
model_token (int): The token limit for the language model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> speech_graph = SpeechGraph(
|
||||||
|
... "List me all the attractions in Chioggia and generate an audio summary.",
|
||||||
|
... "https://en.wikipedia.org/wiki/Chioggia",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict):
|
||||||
"""
|
|
||||||
Initializes the SmartScraperGraph with a prompt, source, and configuration.
|
|
||||||
"""
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
def _create_graph(self):
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping and summarization.
|
Creates the graph of nodes representing the workflow for web scraping and audio generation.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping and audio generation workflow.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="url | local_dir",
|
input="url | local_dir",
|
||||||
output=["doc"],
|
output=["doc"],
|
||||||
@ -93,8 +115,12 @@ class SpeechGraph(AbstractGraph):
|
|||||||
|
|
||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping, summarization, and text-to-speech process.
|
Executes the scraping process and returns the answer to the prompt.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
@ -105,4 +131,4 @@ class SpeechGraph(AbstractGraph):
|
|||||||
"output_path", "output.mp3"))
|
"output_path", "output.mp3"))
|
||||||
print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}")
|
print(f"Audio saved to {self.config.get('output_path', 'output.mp3')}")
|
||||||
|
|
||||||
return self.final_state
|
return self.final_state.get("answer", "No answer found.")
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the smart scraper
|
XMLScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
@ -13,22 +14,46 @@ from .abstract_graph import AbstractGraph
|
|||||||
|
|
||||||
class XMLScraperGraph(AbstractGraph):
|
class XMLScraperGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
SmartScraper is a comprehensive web scraping tool that automates the process of extracting
|
XMLScraperGraph is a scraping pipeline that extracts information from XML files using a natural
|
||||||
information from web pages using a natural language model to interpret and answer prompts.
|
language model to interpret and answer prompts.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
model_token (int): The token limit for the language model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt (str): The prompt for the graph.
|
||||||
|
source (str): The source of the graph.
|
||||||
|
config (dict): Configuration parameters for the graph.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> xml_scraper = XMLScraperGraph(
|
||||||
|
... "List me all the attractions in Chioggia.",
|
||||||
|
... "data/chioggia.xml",
|
||||||
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
|
... )
|
||||||
|
>>> result = xml_scraper.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict):
|
||||||
"""
|
|
||||||
Initializes the XmlScraperGraph with a prompt, source, and configuration.
|
|
||||||
"""
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source)
|
||||||
|
|
||||||
self.input_key = "xml" if source.endswith("xml") else "xml_dir"
|
self.input_key = "xml" if source.endswith("xml") else "xml_dir"
|
||||||
|
|
||||||
def _create_graph(self):
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
Creates the graph of nodes representing the workflow for web scraping.
|
Creates the graph of nodes representing the workflow for web scraping.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
BaseGraph: A graph instance representing the web scraping workflow.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="xml_dir",
|
input="xml_dir",
|
||||||
output=["doc"],
|
output=["doc"],
|
||||||
@ -81,7 +106,11 @@ class XMLScraperGraph(AbstractGraph):
|
|||||||
def run(self) -> str:
|
def run(self) -> str:
|
||||||
"""
|
"""
|
||||||
Executes the web scraping process and returns the answer to the prompt.
|
Executes the web scraping process and returns the answer to the prompt.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The answer to the prompt.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
|
||||||
self.final_state, self.execution_info = self.graph.execute(inputs)
|
self.final_state, self.execution_info = self.graph.execute(inputs)
|
||||||
|
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
__init__.py for th e helpers folder
|
__init__.py for th e helpers folder
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .nodes_metadata import nodes_metadata
|
from .nodes_metadata import nodes_metadata
|
||||||
from .schemas import graph_schema
|
from .schemas import graph_schema
|
||||||
from .models_tokens import models_tokens
|
from .models_tokens import models_tokens
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Models token
|
Models token
|
||||||
"""
|
"""
|
||||||
|
|
||||||
models_tokens = {
|
models_tokens = {
|
||||||
"openai": {
|
"openai": {
|
||||||
"gpt-3.5-turbo-0125": 16385,
|
"gpt-3.5-turbo-0125": 16385,
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
Module for mapping the models in ai agents
|
Module for mapping the models in ai agents
|
||||||
"""
|
"""
|
||||||
|
|
||||||
robots_dictionary = {
|
robots_dictionary = {
|
||||||
"gpt-3.5-turbo": ["GPTBot", "ChatGPT-user"],
|
"gpt-3.5-turbo": ["GPTBot", "ChatGPT-user"],
|
||||||
"gpt-4-turbo": ["GPTBot", "ChatGPT-user"],
|
"gpt-4-turbo": ["GPTBot", "ChatGPT-user"],
|
||||||
|
|||||||
@ -17,7 +17,18 @@ class BaseNode(ABC):
|
|||||||
output (List[str]): List of
|
output (List[str]): List of
|
||||||
min_input_len (int): Minimum required number of input keys.
|
min_input_len (int): Minimum required number of input keys.
|
||||||
node_config (Optional[dict]): Additional configuration for the node.
|
node_config (Optional[dict]): Additional configuration for the node.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node_name (str): Name for identifying the node.
|
||||||
|
node_type (str): Type of the node; must be 'node' or 'conditional_node'.
|
||||||
|
input (str): Expression defining the input keys needed from the state.
|
||||||
|
output (List[str]): List of output keys to be updated in the state.
|
||||||
|
min_input_len (int, optional): Minimum required number of input keys; defaults to 1.
|
||||||
|
node_config (Optional[dict], optional): Additional configuration for the node; defaults to None.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If `node_type` is not one of the allowed types.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> class MyNode(BaseNode):
|
>>> class MyNode(BaseNode):
|
||||||
... def execute(self, state):
|
... def execute(self, state):
|
||||||
@ -31,20 +42,6 @@ class BaseNode(ABC):
|
|||||||
|
|
||||||
def __init__(self, node_name: str, node_type: str, input: str, output: List[str],
|
def __init__(self, node_name: str, node_type: str, input: str, output: List[str],
|
||||||
min_input_len: int = 1, node_config: Optional[dict] = None):
|
min_input_len: int = 1, node_config: Optional[dict] = None):
|
||||||
"""
|
|
||||||
Initialize the instance with the node's name, type, input/output specifications, and configuration details.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
node_name (str): Name for identifying the node.
|
|
||||||
node_type (str): Type of the node; must be 'node' or 'conditional_node'.
|
|
||||||
input (str): Expression defining the input keys needed from the state.
|
|
||||||
output (List[str]): List of output keys to be updated in the state.
|
|
||||||
min_input_len (int, optional): Minimum required number of input keys; defaults to 1.
|
|
||||||
node_config (Optional[dict], optional): Additional configuration for the node; defaults to None.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If `node_type` is not one of the allowed types.
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.node_name = node_name
|
self.node_name = node_name
|
||||||
self.input = input
|
self.input = input
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user