diff --git a/scrapegraphai/graphs/base_graph.py b/scrapegraphai/graphs/base_graph.py index 18a16ba3..0b11ffa4 100644 --- a/scrapegraphai/graphs/base_graph.py +++ b/scrapegraphai/graphs/base_graph.py @@ -56,13 +56,11 @@ class BaseGraph: self.callback_manager = CustomLLMCallbackManager() if nodes[0].node_name != entry_point.node_name: - # raise a warning if the entry point is not the first node in the list warnings.warn( "Careful! The entry point node is different from the first node in the graph.") self._set_conditional_node_edges() - # Burr configuration self.use_burr = use_burr self.burr_config = burr_config or {} @@ -91,7 +89,8 @@ class BaseGraph: if node.node_type == 'conditional_node': outgoing_edges = [(from_node, to_node) for from_node, to_node in self.raw_edges if from_node.node_name == node.node_name] if len(outgoing_edges) != 2: - raise ValueError(f"ConditionalNode '{node.node_name}' must have exactly two outgoing edges.") + raise ValueError(f"""ConditionalNode '{node.node_name}' + must have exactly two outgoing edges.""") node.true_node_name = outgoing_edges[0][1].node_name try: node.false_node_name = outgoing_edges[1][1].node_name @@ -151,14 +150,14 @@ class BaseGraph: """Extracts schema information from the node configuration.""" if not hasattr(current_node, "node_config"): return None - + if not isinstance(current_node.node_config, dict): return None - + schema_config = current_node.node_config.get("schema") if not schema_config or isinstance(schema_config, dict): return None - + try: return schema_config.schema() except Exception: @@ -167,7 +166,7 @@ class BaseGraph: def _execute_node(self, current_node, state, llm_model, llm_model_name): """Executes a single node and returns execution information.""" curr_time = time.time() - + with self.callback_manager.exclusive_get_callback(llm_model, llm_model_name) as cb: result = current_node.execute(state) node_exec_time = time.time() - curr_time @@ -197,17 +196,17 @@ class BaseGraph: raise ValueError( f"Conditional Node returned a node name '{result}' that does not exist in the graph" ) - + return self.edges.get(current_node.node_name) def _execute_standard(self, initial_state: dict) -> Tuple[dict, list]: """ - Executes the graph by traversing nodes starting from the entry point using the standard method. + Executes the graph by traversing nodes + starting from the entry point using the standard method. """ current_node_name = self.entry_point state = initial_state - - # Tracking variables + total_exec_time = 0.0 exec_info = [] cb_total = { @@ -230,16 +229,13 @@ class BaseGraph: while current_node_name: current_node = self._get_node_by_name(current_node_name) - - # Update source information if needed + if source_type is None: source_type, source, prompt = self._update_source_info(current_node, state) - - # Get model information if needed + if llm_model is None: llm_model, llm_model_name, embedder_model = self._get_model_info(current_node) - - # Get schema if needed + if schema is None: schema = self._get_schema(current_node) @@ -273,7 +269,6 @@ class BaseGraph: ) raise e - # Add total results to execution info exec_info.append({ "node_name": "TOTAL RESULT", "total_tokens": cb_total["total_tokens"], @@ -284,7 +279,6 @@ class BaseGraph: "exec_time": total_exec_time, }) - # Log final execution results graph_execution_time = time.time() - start_time response = state.get("answer", None) if source_type == "url" else None content = state.get("parsed_doc", None) if response is not None else None @@ -343,4 +337,3 @@ class BaseGraph: self.raw_edges.append((last_node, node)) self.nodes.append(node) self.edges = self._create_edges({e for e in self.raw_edges}) - diff --git a/scrapegraphai/graphs/code_generator_graph.py b/scrapegraphai/graphs/code_generator_graph.py index fe94e9d5..359b3b1a 100644 --- a/scrapegraphai/graphs/code_generator_graph.py +++ b/scrapegraphai/graphs/code_generator_graph.py @@ -17,7 +17,6 @@ from ..nodes import ( GenerateCodeNode, ) - class CodeGeneratorGraph(AbstractGraph): """ CodeGeneratorGraph is a script generator pipeline that generates diff --git a/scrapegraphai/graphs/csv_scraper_graph.py b/scrapegraphai/graphs/csv_scraper_graph.py index a4165a9d..071bc910 100644 --- a/scrapegraphai/graphs/csv_scraper_graph.py +++ b/scrapegraphai/graphs/csv_scraper_graph.py @@ -59,7 +59,7 @@ class CSVScraperGraph(AbstractGraph): """ Creates the graph of nodes representing the workflow for web scraping. """ - + fetch_node = FetchNode( input="csv | csv_dir", output=["doc"], diff --git a/scrapegraphai/graphs/depth_search_graph.py b/scrapegraphai/graphs/depth_search_graph.py index 0df9c061..92e54de0 100644 --- a/scrapegraphai/graphs/depth_search_graph.py +++ b/scrapegraphai/graphs/depth_search_graph.py @@ -15,7 +15,6 @@ from ..nodes import ( GenerateAnswerNodeKLevel, ) - class DepthSearchGraph(AbstractGraph): """ CodeGeneratorGraph is a script generator pipeline that generates diff --git a/scrapegraphai/graphs/document_scraper_graph.py b/scrapegraphai/graphs/document_scraper_graph.py index db3244c5..58c19ed3 100644 --- a/scrapegraphai/graphs/document_scraper_graph.py +++ b/scrapegraphai/graphs/document_scraper_graph.py @@ -9,7 +9,6 @@ from .base_graph import BaseGraph from .abstract_graph import AbstractGraph from ..nodes import FetchNode, ParseNode, GenerateAnswerNode - class DocumentScraperGraph(AbstractGraph): """ DocumentScraperGraph is a scraping pipeline that automates the process of diff --git a/scrapegraphai/graphs/omni_scraper_graph.py b/scrapegraphai/graphs/omni_scraper_graph.py index 035ad6a7..a7af6bf5 100644 --- a/scrapegraphai/graphs/omni_scraper_graph.py +++ b/scrapegraphai/graphs/omni_scraper_graph.py @@ -9,7 +9,6 @@ from .abstract_graph import AbstractGraph from ..nodes import FetchNode, ParseNode, ImageToTextNode, GenerateAnswerOmniNode from ..models import OpenAIImageToText - class OmniScraperGraph(AbstractGraph): """ OmniScraper is a scraping pipeline that automates the process of diff --git a/scrapegraphai/graphs/script_creator_graph.py b/scrapegraphai/graphs/script_creator_graph.py index 1e785c92..35c6d2ba 100644 --- a/scrapegraphai/graphs/script_creator_graph.py +++ b/scrapegraphai/graphs/script_creator_graph.py @@ -1,14 +1,12 @@ """ ScriptCreatorGraph Module """ - from typing import Optional from pydantic import BaseModel from .base_graph import BaseGraph from .abstract_graph import AbstractGraph from ..nodes import FetchNode, ParseNode, GenerateScraperNode - class ScriptCreatorGraph(AbstractGraph): """ ScriptCreatorGraph defines a scraping pipeline for generating web scraping scripts. diff --git a/scrapegraphai/graphs/search_graph.py b/scrapegraphai/graphs/search_graph.py index 313cb768..2fb4b949 100644 --- a/scrapegraphai/graphs/search_graph.py +++ b/scrapegraphai/graphs/search_graph.py @@ -1,7 +1,6 @@ """ SearchGraph Module """ - from copy import deepcopy from typing import Optional, List from pydantic import BaseModel diff --git a/scrapegraphai/graphs/search_link_graph.py b/scrapegraphai/graphs/search_link_graph.py index e8baf1d8..fa1b6f18 100644 --- a/scrapegraphai/graphs/search_link_graph.py +++ b/scrapegraphai/graphs/search_link_graph.py @@ -1,7 +1,6 @@ """ SearchLinkGraph Module """ - from typing import Optional import logging from pydantic import BaseModel @@ -9,7 +8,6 @@ from .base_graph import BaseGraph from .abstract_graph import AbstractGraph from ..nodes import FetchNode, SearchLinkNode, SearchLinksWithContext - class SearchLinkGraph(AbstractGraph): """ SearchLinkGraph is a scraping pipeline that automates the process of diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index cd9e75bf..404bdcd9 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -1,7 +1,6 @@ """ SmartScraperGraph Module """ - from typing import Optional from pydantic import BaseModel from scrapegraph_py import Client diff --git a/scrapegraphai/graphs/smart_scraper_lite_graph.py b/scrapegraphai/graphs/smart_scraper_lite_graph.py index b751a8c3..fbc8a087 100644 --- a/scrapegraphai/graphs/smart_scraper_lite_graph.py +++ b/scrapegraphai/graphs/smart_scraper_lite_graph.py @@ -1,7 +1,6 @@ """ SmartScraperGraph Module """ - from typing import Optional from pydantic import BaseModel from .base_graph import BaseGraph @@ -11,7 +10,6 @@ from ..nodes import ( ParseNode, ) - class SmartScraperLiteGraph(AbstractGraph): """ SmartScraperLiteGraph is a scraping pipeline that automates the process of diff --git a/scrapegraphai/graphs/speech_graph.py b/scrapegraphai/graphs/speech_graph.py index d9d107c0..8cec90d4 100644 --- a/scrapegraphai/graphs/speech_graph.py +++ b/scrapegraphai/graphs/speech_graph.py @@ -1,7 +1,6 @@ """ SpeechGraph Module """ - from typing import Optional from pydantic import BaseModel from .base_graph import BaseGraph @@ -15,7 +14,6 @@ from ..nodes import ( from ..utils.save_audio_from_bytes import save_audio_from_bytes from ..models import OpenAITextToSpeech - class SpeechGraph(AbstractGraph): """ SpeechyGraph is a scraping pipeline that scrapes the web, provide an answer diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py index 88225a20..284868ff 100644 --- a/scrapegraphai/nodes/fetch_node.py +++ b/scrapegraphai/nodes/fetch_node.py @@ -1,7 +1,6 @@ """ FetchNode Module """ - import json from typing import List, Optional from langchain_openai import ChatOpenAI, AzureChatOpenAI @@ -15,7 +14,6 @@ from ..utils.convert_to_md import convert_to_md from ..utils.logging import get_logger from .base_node import BaseNode - class FetchNode(BaseNode): """ A node responsible for fetching the HTML content of a specified URL and updating diff --git a/scrapegraphai/nodes/fetch_node_level_k.py b/scrapegraphai/nodes/fetch_node_level_k.py index 3307f129..8be392aa 100644 --- a/scrapegraphai/nodes/fetch_node_level_k.py +++ b/scrapegraphai/nodes/fetch_node_level_k.py @@ -1,7 +1,6 @@ """ fetch_node_level_k module """ - from typing import List, Optional from urllib.parse import urljoin from langchain_core.documents import Document