feat: fixed custom_graphs example and robots_node

This commit is contained in:
Marco Perini 2024-05-05 22:02:24 +02:00
parent 8c5397f67a
commit 84fcb44aaa
5 changed files with 139 additions and 15 deletions

View File

@ -4,6 +4,8 @@ Example of custom graph using existing nodes
import os import os
from dotenv import load_dotenv from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
@ -20,7 +22,7 @@ graph_config = {
"api_key": openai_key, "api_key": openai_key,
"model": "gpt-3.5-turbo", "model": "gpt-3.5-turbo",
"temperature": 0, "temperature": 0,
"streaming": True "streaming": False
}, },
} }
@ -29,33 +31,50 @@ graph_config = {
# ************************************************ # ************************************************
llm_model = OpenAI(graph_config["llm"]) llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
# define the nodes for the graph # define the nodes for the graph
robot_node = RobotsNode( robot_node = RobotsNode(
input="url", input="url",
output=["is_scrapable"], output=["is_scrapable"],
node_config={"llm_model": llm_model} node_config={
"llm_model": llm_model,
"verbose": True,
}
) )
fetch_node = FetchNode( fetch_node = FetchNode(
input="url | local_dir", input="url | local_dir",
output=["doc"], output=["doc"],
node_config={"headless": True, "verbose": True} node_config={
"verbose": True,
"headless": True,
}
) )
parse_node = ParseNode( parse_node = ParseNode(
input="doc", input="doc",
output=["parsed_doc"], output=["parsed_doc"],
node_config={"chunk_size": 4096} node_config={
"chunk_size": 4096,
"verbose": True,
}
) )
rag_node = RAGNode( rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)", input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"], output=["relevant_chunks"],
node_config={"llm_model": llm_model}, node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
) )
generate_answer_node = GenerateAnswerNode( generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)", input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"], output=["answer"],
node_config={"llm_model": llm_model}, node_config={
"llm_model": llm_model,
"verbose": True,
}
) )
# ************************************************ # ************************************************

View File

@ -21,7 +21,7 @@ graph_config = {
"api_key": openai_key, "api_key": openai_key,
"model": "gpt-3.5-turbo", "model": "gpt-3.5-turbo",
}, },
"verbose": True, "verbose": False,
} }
# ************************************************ # ************************************************

View File

@ -56,7 +56,7 @@ class AbstractGraph(ABC):
self.execution_info = None self.execution_info = None
# Set common configuration parameters # Set common configuration parameters
self.verbose = True if config is None else config.get("verbose", False) self.verbose = False if config is None else config.get("verbose", False)
self.headless = True if config is None else config.get( self.headless = True if config is None else config.get(
"headless", True) "headless", True)
common_params = {"headless": self.headless, common_params = {"headless": self.headless,

View File

@ -0,0 +1,105 @@
"""
Example of custom graph using existing nodes
"""
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
},
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
search_internet_node = SearchInternetNode(
input="user_prompt",
output=["url"],
node_config={
"llm_model": llm_model
}
)
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"],
node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
# ************************************************
# Create the graph by defining the connections
# ************************************************
graph = BaseGraph(
nodes=[
search_internet_node,
fetch_node,
parse_node,
rag_node,
generate_answer_node,
],
edges=[
(search_internet_node, fetch_node),
(fetch_node, parse_node),
(parse_node, rag_node),
(rag_node, generate_answer_node)
],
entry_point=search_internet_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "List me all the typical Chioggia dishes."
})
# get the answer from the result
result = result.get("answer", "No answer found.")
print(result)

View File

@ -2,9 +2,9 @@
RobotsNode Module RobotsNode Module
""" """
from typing import List from typing import List, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from langchain_community.document_loaders import AsyncHtmlLoader from langchain_community.document_loaders import AsyncChromiumLoader
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.output_parsers import CommaSeparatedListOutputParser
from .base_node import BaseNode from .base_node import BaseNode
@ -34,7 +34,7 @@ class RobotsNode(BaseNode):
node_name (str): The unique identifier name for the node, defaulting to "Robots". node_name (str): The unique identifier name for the node, defaulting to "Robots".
""" """
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True, def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True,
node_name: str = "Robots"): node_name: str = "Robots"):
super().__init__(node_name, "node", input, output, 1) super().__init__(node_name, "node", input, output, 1)
@ -93,11 +93,11 @@ class RobotsNode(BaseNode):
else: else:
parsed_url = urlparse(source) parsed_url = urlparse(source)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
loader = AsyncHtmlLoader(f"{base_url}/robots.txt") loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
document = loader.load() document = loader.load()
if "ollama" in self.llm_model.model: if "ollama" in self.llm_model.model_name:
self.llm_model.model = self.llm_model.model.split("/")[-1] self.llm_model.model_name = self.llm_model.model_name.split("/")[-1]
model = self.llm_model.model.split("/")[-1] model = self.llm_model.model_name.split("/")[-1]
else: else:
model = self.llm_model.model_name model = self.llm_model.model_name