feat: fixed custom_graphs example and robots_node

This commit is contained in:
Marco Perini 2024-05-05 22:02:24 +02:00
parent 8c5397f67a
commit 84fcb44aaa
5 changed files with 139 additions and 15 deletions

View File

@ -4,6 +4,8 @@ Example of custom graph using existing nodes
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
@ -20,7 +22,7 @@ graph_config = {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
"temperature": 0,
"streaming": True
"streaming": False
},
}
@ -29,33 +31,50 @@ graph_config = {
# ************************************************
llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
# define the nodes for the graph
robot_node = RobotsNode(
input="url",
output=["is_scrapable"],
node_config={"llm_model": llm_model}
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
node_config={"headless": True, "verbose": True}
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={"chunk_size": 4096}
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"],
node_config={"llm_model": llm_model},
node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={"llm_model": llm_model},
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
# ************************************************

View File

@ -21,7 +21,7 @@ graph_config = {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
},
"verbose": True,
"verbose": False,
}
# ************************************************

View File

@ -56,7 +56,7 @@ class AbstractGraph(ABC):
self.execution_info = None
# Set common configuration parameters
self.verbose = True if config is None else config.get("verbose", False)
self.verbose = False if config is None else config.get("verbose", False)
self.headless = True if config is None else config.get(
"headless", True)
common_params = {"headless": self.headless,

View File

@ -0,0 +1,105 @@
"""
Example of custom graph using existing nodes
"""
import os
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
},
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
search_internet_node = SearchInternetNode(
input="user_prompt",
output=["url"],
node_config={
"llm_model": llm_model
}
)
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"],
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"],
node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={
"llm_model": llm_model,
"verbose": True,
}
)
# ************************************************
# Create the graph by defining the connections
# ************************************************
graph = BaseGraph(
nodes=[
search_internet_node,
fetch_node,
parse_node,
rag_node,
generate_answer_node,
],
edges=[
(search_internet_node, fetch_node),
(fetch_node, parse_node),
(parse_node, rag_node),
(rag_node, generate_answer_node)
],
entry_point=search_internet_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "List me all the typical Chioggia dishes."
})
# get the answer from the result
result = result.get("answer", "No answer found.")
print(result)

View File

@ -2,9 +2,9 @@
RobotsNode Module
"""
from typing import List
from typing import List, Optional
from urllib.parse import urlparse
from langchain_community.document_loaders import AsyncHtmlLoader
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser
from .base_node import BaseNode
@ -34,7 +34,7 @@ class RobotsNode(BaseNode):
node_name (str): The unique identifier name for the node, defaulting to "Robots".
"""
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True,
node_name: str = "Robots"):
super().__init__(node_name, "node", input, output, 1)
@ -93,11 +93,11 @@ class RobotsNode(BaseNode):
else:
parsed_url = urlparse(source)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
loader = AsyncHtmlLoader(f"{base_url}/robots.txt")
loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
document = loader.load()
if "ollama" in self.llm_model.model:
self.llm_model.model = self.llm_model.model.split("/")[-1]
model = self.llm_model.model.split("/")[-1]
if "ollama" in self.llm_model.model_name:
self.llm_model.model_name = self.llm_model.model_name.split("/")[-1]
model = self.llm_model.model_name.split("/")[-1]
else:
model = self.llm_model.model_name