mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
feat: fixed custom_graphs example and robots_node
This commit is contained in:
parent
8c5397f67a
commit
84fcb44aaa
@ -4,6 +4,8 @@ Example of custom graph using existing nodes
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from scrapegraphai.models import OpenAI
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
|
||||
@ -20,7 +22,7 @@ graph_config = {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"temperature": 0,
|
||||
"streaming": True
|
||||
"streaming": False
|
||||
},
|
||||
}
|
||||
|
||||
@ -29,33 +31,50 @@ graph_config = {
|
||||
# ************************************************
|
||||
|
||||
llm_model = OpenAI(graph_config["llm"])
|
||||
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||
|
||||
# define the nodes for the graph
|
||||
robot_node = RobotsNode(
|
||||
input="url",
|
||||
output=["is_scrapable"],
|
||||
node_config={"llm_model": llm_model}
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc"],
|
||||
node_config={"headless": True, "verbose": True}
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={"chunk_size": 4096}
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={"llm_model": llm_model},
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"embedder_model": embedder,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={"llm_model": llm_model},
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
|
||||
@ -21,7 +21,7 @@ graph_config = {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
"verbose": True,
|
||||
"verbose": False,
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
|
||||
@ -56,7 +56,7 @@ class AbstractGraph(ABC):
|
||||
self.execution_info = None
|
||||
|
||||
# Set common configuration parameters
|
||||
self.verbose = True if config is None else config.get("verbose", False)
|
||||
self.verbose = False if config is None else config.get("verbose", False)
|
||||
self.headless = True if config is None else config.get(
|
||||
"headless", True)
|
||||
common_params = {"headless": self.headless,
|
||||
|
||||
105
scrapegraphai/nodes/graphs_iterator_node.py
Normal file
105
scrapegraphai/nodes/graphs_iterator_node.py
Normal file
@ -0,0 +1,105 @@
|
||||
"""
|
||||
Example of custom graph using existing nodes
|
||||
"""
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from scrapegraphai.models import OpenAI
|
||||
from scrapegraphai.graphs import BaseGraph
|
||||
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode
|
||||
load_dotenv()
|
||||
|
||||
# ************************************************
|
||||
# Define the configuration for the graph
|
||||
# ************************************************
|
||||
|
||||
openai_key = os.getenv("OPENAI_APIKEY")
|
||||
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
}
|
||||
|
||||
# ************************************************
|
||||
# Define the graph nodes
|
||||
# ************************************************
|
||||
|
||||
llm_model = OpenAI(graph_config["llm"])
|
||||
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||
|
||||
search_internet_node = SearchInternetNode(
|
||||
input="user_prompt",
|
||||
output=["url"],
|
||||
node_config={
|
||||
"llm_model": llm_model
|
||||
}
|
||||
)
|
||||
fetch_node = FetchNode(
|
||||
input="url | local_dir",
|
||||
output=["doc"],
|
||||
node_config={
|
||||
"verbose": True,
|
||||
"headless": True,
|
||||
}
|
||||
)
|
||||
parse_node = ParseNode(
|
||||
input="doc",
|
||||
output=["parsed_doc"],
|
||||
node_config={
|
||||
"chunk_size": 4096,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
rag_node = RAGNode(
|
||||
input="user_prompt & (parsed_doc | doc)",
|
||||
output=["relevant_chunks"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"embedder_model": embedder,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
generate_answer_node = GenerateAnswerNode(
|
||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||
output=["answer"],
|
||||
node_config={
|
||||
"llm_model": llm_model,
|
||||
"verbose": True,
|
||||
}
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Create the graph by defining the connections
|
||||
# ************************************************
|
||||
|
||||
graph = BaseGraph(
|
||||
nodes=[
|
||||
search_internet_node,
|
||||
fetch_node,
|
||||
parse_node,
|
||||
rag_node,
|
||||
generate_answer_node,
|
||||
],
|
||||
edges=[
|
||||
(search_internet_node, fetch_node),
|
||||
(fetch_node, parse_node),
|
||||
(parse_node, rag_node),
|
||||
(rag_node, generate_answer_node)
|
||||
],
|
||||
entry_point=search_internet_node
|
||||
)
|
||||
|
||||
# ************************************************
|
||||
# Execute the graph
|
||||
# ************************************************
|
||||
|
||||
result, execution_info = graph.execute({
|
||||
"user_prompt": "List me all the typical Chioggia dishes."
|
||||
})
|
||||
|
||||
# get the answer from the result
|
||||
result = result.get("answer", "No answer found.")
|
||||
print(result)
|
||||
@ -2,9 +2,9 @@
|
||||
RobotsNode Module
|
||||
"""
|
||||
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from urllib.parse import urlparse
|
||||
from langchain_community.document_loaders import AsyncHtmlLoader
|
||||
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||
from .base_node import BaseNode
|
||||
@ -34,7 +34,7 @@ class RobotsNode(BaseNode):
|
||||
node_name (str): The unique identifier name for the node, defaulting to "Robots".
|
||||
"""
|
||||
|
||||
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
|
||||
def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True,
|
||||
node_name: str = "Robots"):
|
||||
super().__init__(node_name, "node", input, output, 1)
|
||||
|
||||
@ -93,11 +93,11 @@ class RobotsNode(BaseNode):
|
||||
else:
|
||||
parsed_url = urlparse(source)
|
||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
loader = AsyncHtmlLoader(f"{base_url}/robots.txt")
|
||||
loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
|
||||
document = loader.load()
|
||||
if "ollama" in self.llm_model.model:
|
||||
self.llm_model.model = self.llm_model.model.split("/")[-1]
|
||||
model = self.llm_model.model.split("/")[-1]
|
||||
if "ollama" in self.llm_model.model_name:
|
||||
self.llm_model.model_name = self.llm_model.model_name.split("/")[-1]
|
||||
model = self.llm_model.model_name.split("/")[-1]
|
||||
|
||||
else:
|
||||
model = self.llm_model.model_name
|
||||
|
||||
Loading…
Reference in New Issue
Block a user