mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
feat: fixed custom_graphs example and robots_node
This commit is contained in:
parent
8c5397f67a
commit
84fcb44aaa
@ -4,6 +4,8 @@ Example of custom graph using existing nodes
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from langchain_openai import OpenAIEmbeddings
|
||||||
from scrapegraphai.models import OpenAI
|
from scrapegraphai.models import OpenAI
|
||||||
from scrapegraphai.graphs import BaseGraph
|
from scrapegraphai.graphs import BaseGraph
|
||||||
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
|
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
|
||||||
@ -20,7 +22,7 @@ graph_config = {
|
|||||||
"api_key": openai_key,
|
"api_key": openai_key,
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"streaming": True
|
"streaming": False
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -29,33 +31,50 @@ graph_config = {
|
|||||||
# ************************************************
|
# ************************************************
|
||||||
|
|
||||||
llm_model = OpenAI(graph_config["llm"])
|
llm_model = OpenAI(graph_config["llm"])
|
||||||
|
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||||
|
|
||||||
# define the nodes for the graph
|
# define the nodes for the graph
|
||||||
robot_node = RobotsNode(
|
robot_node = RobotsNode(
|
||||||
input="url",
|
input="url",
|
||||||
output=["is_scrapable"],
|
output=["is_scrapable"],
|
||||||
node_config={"llm_model": llm_model}
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="url | local_dir",
|
input="url | local_dir",
|
||||||
output=["doc"],
|
output=["doc"],
|
||||||
node_config={"headless": True, "verbose": True}
|
node_config={
|
||||||
|
"verbose": True,
|
||||||
|
"headless": True,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
parse_node = ParseNode(
|
parse_node = ParseNode(
|
||||||
input="doc",
|
input="doc",
|
||||||
output=["parsed_doc"],
|
output=["parsed_doc"],
|
||||||
node_config={"chunk_size": 4096}
|
node_config={
|
||||||
|
"chunk_size": 4096,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
rag_node = RAGNode(
|
rag_node = RAGNode(
|
||||||
input="user_prompt & (parsed_doc | doc)",
|
input="user_prompt & (parsed_doc | doc)",
|
||||||
output=["relevant_chunks"],
|
output=["relevant_chunks"],
|
||||||
node_config={"llm_model": llm_model},
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"embedder_model": embedder,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
generate_answer_node = GenerateAnswerNode(
|
generate_answer_node = GenerateAnswerNode(
|
||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={"llm_model": llm_model},
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# ************************************************
|
# ************************************************
|
||||||
|
|||||||
@ -21,7 +21,7 @@ graph_config = {
|
|||||||
"api_key": openai_key,
|
"api_key": openai_key,
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
},
|
},
|
||||||
"verbose": True,
|
"verbose": False,
|
||||||
}
|
}
|
||||||
|
|
||||||
# ************************************************
|
# ************************************************
|
||||||
|
|||||||
@ -56,7 +56,7 @@ class AbstractGraph(ABC):
|
|||||||
self.execution_info = None
|
self.execution_info = None
|
||||||
|
|
||||||
# Set common configuration parameters
|
# Set common configuration parameters
|
||||||
self.verbose = True if config is None else config.get("verbose", False)
|
self.verbose = False if config is None else config.get("verbose", False)
|
||||||
self.headless = True if config is None else config.get(
|
self.headless = True if config is None else config.get(
|
||||||
"headless", True)
|
"headless", True)
|
||||||
common_params = {"headless": self.headless,
|
common_params = {"headless": self.headless,
|
||||||
|
|||||||
105
scrapegraphai/nodes/graphs_iterator_node.py
Normal file
105
scrapegraphai/nodes/graphs_iterator_node.py
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
"""
|
||||||
|
Example of custom graph using existing nodes
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from langchain_openai import OpenAIEmbeddings
|
||||||
|
from scrapegraphai.models import OpenAI
|
||||||
|
from scrapegraphai.graphs import BaseGraph
|
||||||
|
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
openai_key = os.getenv("OPENAI_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": openai_key,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the graph nodes
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
llm_model = OpenAI(graph_config["llm"])
|
||||||
|
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||||
|
|
||||||
|
search_internet_node = SearchInternetNode(
|
||||||
|
input="user_prompt",
|
||||||
|
output=["url"],
|
||||||
|
node_config={
|
||||||
|
"llm_model": llm_model
|
||||||
|
}
|
||||||
|
)
|
||||||
|
fetch_node = FetchNode(
|
||||||
|
input="url | local_dir",
|
||||||
|
output=["doc"],
|
||||||
|
node_config={
|
||||||
|
"verbose": True,
|
||||||
|
"headless": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
parse_node = ParseNode(
|
||||||
|
input="doc",
|
||||||
|
output=["parsed_doc"],
|
||||||
|
node_config={
|
||||||
|
"chunk_size": 4096,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
rag_node = RAGNode(
|
||||||
|
input="user_prompt & (parsed_doc | doc)",
|
||||||
|
output=["relevant_chunks"],
|
||||||
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"embedder_model": embedder,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
generate_answer_node = GenerateAnswerNode(
|
||||||
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
|
output=["answer"],
|
||||||
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"verbose": True,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the graph by defining the connections
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph = BaseGraph(
|
||||||
|
nodes=[
|
||||||
|
search_internet_node,
|
||||||
|
fetch_node,
|
||||||
|
parse_node,
|
||||||
|
rag_node,
|
||||||
|
generate_answer_node,
|
||||||
|
],
|
||||||
|
edges=[
|
||||||
|
(search_internet_node, fetch_node),
|
||||||
|
(fetch_node, parse_node),
|
||||||
|
(parse_node, rag_node),
|
||||||
|
(rag_node, generate_answer_node)
|
||||||
|
],
|
||||||
|
entry_point=search_internet_node
|
||||||
|
)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Execute the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
result, execution_info = graph.execute({
|
||||||
|
"user_prompt": "List me all the typical Chioggia dishes."
|
||||||
|
})
|
||||||
|
|
||||||
|
# get the answer from the result
|
||||||
|
result = result.get("answer", "No answer found.")
|
||||||
|
print(result)
|
||||||
@ -2,9 +2,9 @@
|
|||||||
RobotsNode Module
|
RobotsNode Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List
|
from typing import List, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from langchain_community.document_loaders import AsyncHtmlLoader
|
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||||
from langchain.prompts import PromptTemplate
|
from langchain.prompts import PromptTemplate
|
||||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
@ -34,7 +34,7 @@ class RobotsNode(BaseNode):
|
|||||||
node_name (str): The unique identifier name for the node, defaulting to "Robots".
|
node_name (str): The unique identifier name for the node, defaulting to "Robots".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True,
|
def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True,
|
||||||
node_name: str = "Robots"):
|
node_name: str = "Robots"):
|
||||||
super().__init__(node_name, "node", input, output, 1)
|
super().__init__(node_name, "node", input, output, 1)
|
||||||
|
|
||||||
@ -93,11 +93,11 @@ class RobotsNode(BaseNode):
|
|||||||
else:
|
else:
|
||||||
parsed_url = urlparse(source)
|
parsed_url = urlparse(source)
|
||||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||||
loader = AsyncHtmlLoader(f"{base_url}/robots.txt")
|
loader = AsyncChromiumLoader(f"{base_url}/robots.txt")
|
||||||
document = loader.load()
|
document = loader.load()
|
||||||
if "ollama" in self.llm_model.model:
|
if "ollama" in self.llm_model.model_name:
|
||||||
self.llm_model.model = self.llm_model.model.split("/")[-1]
|
self.llm_model.model_name = self.llm_model.model_name.split("/")[-1]
|
||||||
model = self.llm_model.model.split("/")[-1]
|
model = self.llm_model.model_name.split("/")[-1]
|
||||||
|
|
||||||
else:
|
else:
|
||||||
model = self.llm_model.model_name
|
model = self.llm_model.model_name
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user