diff --git a/examples/openai/custom_graph_openai.py b/examples/openai/custom_graph_openai.py index 5744669b..14dd99bd 100644 --- a/examples/openai/custom_graph_openai.py +++ b/examples/openai/custom_graph_openai.py @@ -4,6 +4,8 @@ Example of custom graph using existing nodes import os from dotenv import load_dotenv + +from langchain_openai import OpenAIEmbeddings from scrapegraphai.models import OpenAI from scrapegraphai.graphs import BaseGraph from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode @@ -20,7 +22,7 @@ graph_config = { "api_key": openai_key, "model": "gpt-3.5-turbo", "temperature": 0, - "streaming": True + "streaming": False }, } @@ -29,33 +31,50 @@ graph_config = { # ************************************************ llm_model = OpenAI(graph_config["llm"]) +embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph robot_node = RobotsNode( input="url", output=["is_scrapable"], - node_config={"llm_model": llm_model} + node_config={ + "llm_model": llm_model, + "verbose": True, + } ) fetch_node = FetchNode( input="url | local_dir", output=["doc"], - node_config={"headless": True, "verbose": True} + node_config={ + "verbose": True, + "headless": True, + } ) parse_node = ParseNode( input="doc", output=["parsed_doc"], - node_config={"chunk_size": 4096} + node_config={ + "chunk_size": 4096, + "verbose": True, + } ) rag_node = RAGNode( input="user_prompt & (parsed_doc | doc)", output=["relevant_chunks"], - node_config={"llm_model": llm_model}, + node_config={ + "llm_model": llm_model, + "embedder_model": embedder, + "verbose": True, + } ) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], - node_config={"llm_model": llm_model}, + node_config={ + "llm_model": llm_model, + "verbose": True, + } ) # ************************************************ diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index 32a1942b..610e6697 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -21,7 +21,7 @@ graph_config = { "api_key": openai_key, "model": "gpt-3.5-turbo", }, - "verbose": True, + "verbose": False, } # ************************************************ diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 650ed988..089b0f95 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -56,7 +56,7 @@ class AbstractGraph(ABC): self.execution_info = None # Set common configuration parameters - self.verbose = True if config is None else config.get("verbose", False) + self.verbose = False if config is None else config.get("verbose", False) self.headless = True if config is None else config.get( "headless", True) common_params = {"headless": self.headless, diff --git a/scrapegraphai/nodes/graphs_iterator_node.py b/scrapegraphai/nodes/graphs_iterator_node.py new file mode 100644 index 00000000..5728f062 --- /dev/null +++ b/scrapegraphai/nodes/graphs_iterator_node.py @@ -0,0 +1,105 @@ +""" +Example of custom graph using existing nodes +""" + +import os +from dotenv import load_dotenv +from langchain_openai import OpenAIEmbeddings +from scrapegraphai.models import OpenAI +from scrapegraphai.graphs import BaseGraph +from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, SearchInternetNode +load_dotenv() + +# ************************************************ +# Define the configuration for the graph +# ************************************************ + +openai_key = os.getenv("OPENAI_APIKEY") + +graph_config = { + "llm": { + "api_key": openai_key, + "model": "gpt-3.5-turbo", + }, +} + +# ************************************************ +# Define the graph nodes +# ************************************************ + +llm_model = OpenAI(graph_config["llm"]) +embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) + +search_internet_node = SearchInternetNode( + input="user_prompt", + output=["url"], + node_config={ + "llm_model": llm_model + } +) +fetch_node = FetchNode( + input="url | local_dir", + output=["doc"], + node_config={ + "verbose": True, + "headless": True, + } +) +parse_node = ParseNode( + input="doc", + output=["parsed_doc"], + node_config={ + "chunk_size": 4096, + "verbose": True, + } +) +rag_node = RAGNode( + input="user_prompt & (parsed_doc | doc)", + output=["relevant_chunks"], + node_config={ + "llm_model": llm_model, + "embedder_model": embedder, + "verbose": True, + } +) +generate_answer_node = GenerateAnswerNode( + input="user_prompt & (relevant_chunks | parsed_doc | doc)", + output=["answer"], + node_config={ + "llm_model": llm_model, + "verbose": True, + } +) + +# ************************************************ +# Create the graph by defining the connections +# ************************************************ + +graph = BaseGraph( + nodes=[ + search_internet_node, + fetch_node, + parse_node, + rag_node, + generate_answer_node, + ], + edges=[ + (search_internet_node, fetch_node), + (fetch_node, parse_node), + (parse_node, rag_node), + (rag_node, generate_answer_node) + ], + entry_point=search_internet_node +) + +# ************************************************ +# Execute the graph +# ************************************************ + +result, execution_info = graph.execute({ + "user_prompt": "List me all the typical Chioggia dishes." +}) + +# get the answer from the result +result = result.get("answer", "No answer found.") +print(result) diff --git a/scrapegraphai/nodes/robots_node.py b/scrapegraphai/nodes/robots_node.py index 8c341183..e56a95d1 100644 --- a/scrapegraphai/nodes/robots_node.py +++ b/scrapegraphai/nodes/robots_node.py @@ -2,9 +2,9 @@ RobotsNode Module """ -from typing import List +from typing import List, Optional from urllib.parse import urlparse -from langchain_community.document_loaders import AsyncHtmlLoader +from langchain_community.document_loaders import AsyncChromiumLoader from langchain.prompts import PromptTemplate from langchain.output_parsers import CommaSeparatedListOutputParser from .base_node import BaseNode @@ -34,7 +34,7 @@ class RobotsNode(BaseNode): node_name (str): The unique identifier name for the node, defaulting to "Robots". """ - def __init__(self, input: str, output: List[str], node_config: dict, force_scraping=True, + def __init__(self, input: str, output: List[str], node_config: Optional[dict]=None, force_scraping=True, node_name: str = "Robots"): super().__init__(node_name, "node", input, output, 1) @@ -93,11 +93,11 @@ class RobotsNode(BaseNode): else: parsed_url = urlparse(source) base_url = f"{parsed_url.scheme}://{parsed_url.netloc}" - loader = AsyncHtmlLoader(f"{base_url}/robots.txt") + loader = AsyncChromiumLoader(f"{base_url}/robots.txt") document = loader.load() - if "ollama" in self.llm_model.model: - self.llm_model.model = self.llm_model.model.split("/")[-1] - model = self.llm_model.model.split("/")[-1] + if "ollama" in self.llm_model.model_name: + self.llm_model.model_name = self.llm_model.model_name.split("/")[-1] + model = self.llm_model.model_name.split("/")[-1] else: model = self.llm_model.model_name