mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
feat: add logger integration
This commit is contained in:
parent
218b8ede8a
commit
e53766b16e
@ -1,4 +1,4 @@
|
||||
"""
|
||||
""""
|
||||
FetchNode Module
|
||||
"""
|
||||
|
||||
@ -13,7 +13,7 @@ from langchain_core.documents import Document
|
||||
from ..docloaders import ChromiumLoader
|
||||
from .base_node import BaseNode
|
||||
from ..utils.cleanup_html import cleanup_html
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class FetchNode(BaseNode):
|
||||
"""
|
||||
@ -74,7 +74,8 @@ class FetchNode(BaseNode):
|
||||
necessary information to perform the operation is missing.
|
||||
"""
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("fetch node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
@ -128,7 +129,7 @@ class FetchNode(BaseNode):
|
||||
cleanedup_html = cleanup_html(response.text, source)
|
||||
compressed_document = [Document(page_content=cleanedup_html)]
|
||||
else:
|
||||
print(f"Failed to retrieve contents from the webpage at url: {source}")
|
||||
logger.warning(f"Failed to retrieve contents from the webpage at url: {source}")
|
||||
|
||||
else:
|
||||
loader_kwargs = {}
|
||||
@ -144,4 +145,4 @@ class FetchNode(BaseNode):
|
||||
]
|
||||
|
||||
state.update({self.output[0]: compressed_document})
|
||||
return state
|
||||
return state
|
||||
|
||||
@ -9,6 +9,7 @@ from tqdm import tqdm
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
@ -72,7 +73,8 @@ class GenerateAnswerCSVNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("generate_answer csv node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -10,6 +10,7 @@ from tqdm import tqdm
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
@ -59,7 +60,8 @@ class GenerateAnswerNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("generate answer node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -9,6 +9,7 @@ from tqdm import tqdm
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
@ -72,7 +73,8 @@ class GenerateAnswerPDFNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("generate answer pdf node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -10,6 +10,7 @@ from tqdm import tqdm
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
@ -63,7 +64,8 @@ class GenerateScraperNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("generate scraper node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -6,7 +6,7 @@ from typing import List, Optional
|
||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||
from langchain.prompts import PromptTemplate
|
||||
from .base_node import BaseNode
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class GetProbableTagsNode(BaseNode):
|
||||
"""
|
||||
@ -25,11 +25,12 @@ class GetProbableTagsNode(BaseNode):
|
||||
node_name (str): The unique identifier name for the node, defaulting to "GetProbableTags".
|
||||
"""
|
||||
|
||||
def __init__(self, input: str, output: List[str], model_config: dict,
|
||||
def __init__(self, input: str, output: List[str], node_config: dict,
|
||||
node_name: str = "GetProbableTags"):
|
||||
super().__init__(node_name, "node", input, output, 2, model_config)
|
||||
super().__init__(node_name, "node", input, output, 2, node_config)
|
||||
|
||||
self.llm_model = model_config["llm_model"]
|
||||
self.llm_model = node_config["llm_model"]
|
||||
self.verbose = False if node_config is None else node_config.get("verbose", False)
|
||||
|
||||
def execute(self, state: dict) -> dict:
|
||||
"""
|
||||
@ -49,7 +50,9 @@ class GetProbableTagsNode(BaseNode):
|
||||
necessary information for generating tag predictions is missing.
|
||||
"""
|
||||
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
if self.verbose:
|
||||
logger = get_logger("get probable tags node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -5,7 +5,7 @@ GraphIterator Module
|
||||
import asyncio
|
||||
import copy
|
||||
from typing import List, Optional
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
from tqdm.asyncio import tqdm
|
||||
|
||||
from .base_node import BaseNode
|
||||
|
||||
@ -4,6 +4,7 @@ ImageToTextNode Module
|
||||
|
||||
from typing import List, Optional
|
||||
from .base_node import BaseNode
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
|
||||
class ImageToTextNode(BaseNode):
|
||||
@ -42,7 +43,8 @@ class ImageToTextNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print("---GENERATING TEXT FROM IMAGE---")
|
||||
logger = get_logger("image to text node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
input_keys = self.get_input_keys(state)
|
||||
input_data = [state[key] for key in input_keys]
|
||||
|
||||
@ -5,7 +5,7 @@ MergeAnswersNode Module
|
||||
# Imports from standard library
|
||||
from typing import List, Optional
|
||||
from tqdm import tqdm
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
# Imports from Langchain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
@ -54,7 +54,8 @@ class MergeAnswersNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("fetch node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -6,7 +6,7 @@ from typing import List, Optional
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.document_transformers import Html2TextTransformer
|
||||
from .base_node import BaseNode
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class ParseNode(BaseNode):
|
||||
"""
|
||||
@ -49,7 +49,8 @@ class ParseNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("fetch node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -8,6 +8,7 @@ from langchain.retrievers import ContextualCompressionRetriever
|
||||
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
|
||||
from langchain_community.document_transformers import EmbeddingsRedundantFilter
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
from .base_node import BaseNode
|
||||
|
||||
@ -55,9 +56,10 @@ class RAGNode(BaseNode):
|
||||
KeyError: If the input keys are not found in the state, indicating that the
|
||||
necessary information for compressing the content is missing.
|
||||
"""
|
||||
logger = get_logger("rag node")
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
@ -80,7 +82,7 @@ class RAGNode(BaseNode):
|
||||
chunked_docs.append(doc)
|
||||
|
||||
if self.verbose:
|
||||
print("--- (updated chunks metadata) ---")
|
||||
logger.info("--- (updated chunks metadata) ---")
|
||||
|
||||
# check if embedder_model is provided, if not use llm_model
|
||||
self.embedder_model = self.embedder_model if self.embedder_model else self.llm_model
|
||||
|
||||
@ -9,7 +9,7 @@ from langchain.prompts import PromptTemplate
|
||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||
from .base_node import BaseNode
|
||||
from ..helpers import robots_dictionary
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class RobotsNode(BaseNode):
|
||||
"""
|
||||
@ -61,9 +61,10 @@ class RobotsNode(BaseNode):
|
||||
ValueError: If the website is not scrapeable based on the robots.txt file and
|
||||
scraping is not enforced.
|
||||
"""
|
||||
logger = get_logger("robots node")
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
@ -121,17 +122,17 @@ class RobotsNode(BaseNode):
|
||||
|
||||
if "no" in is_scrapable:
|
||||
if self.verbose:
|
||||
print("\033[31m(Scraping this website is not allowed)\033[0m")
|
||||
logger.warning("\033[31m(Scraping this website is not allowed)\033[0m")
|
||||
|
||||
if not self.force_scraping:
|
||||
raise ValueError(
|
||||
'The website you selected is not scrapable')
|
||||
else:
|
||||
if self.verbose:
|
||||
print("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
|
||||
logger.warning("\033[33m(WARNING: Scraping this website is not allowed but you decided to force it)\033[0m")
|
||||
else:
|
||||
if self.verbose:
|
||||
print("\033[32m(Scraping this website is allowed)\033[0m")
|
||||
logger.warning("\033[32m(Scraping this website is allowed)\033[0m")
|
||||
|
||||
state.update({self.output[0]: is_scrapable})
|
||||
return state
|
||||
|
||||
@ -7,7 +7,7 @@ from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||
from langchain.prompts import PromptTemplate
|
||||
from ..utils.research_web import search_on_web
|
||||
from .base_node import BaseNode
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class SearchInternetNode(BaseNode):
|
||||
"""
|
||||
@ -54,9 +54,10 @@ class SearchInternetNode(BaseNode):
|
||||
KeyError: If the input keys are not found in the state, indicating that the
|
||||
necessary information for generating the answer is missing.
|
||||
"""
|
||||
logger = get_logger("search interne node")
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -88,7 +89,8 @@ class SearchInternetNode(BaseNode):
|
||||
search_query = search_answer.invoke({"user_prompt": user_prompt})[0]
|
||||
|
||||
if self.verbose:
|
||||
print(f"Search Query: {search_query}")
|
||||
logger.info(f"Search Query: {search_query}")
|
||||
|
||||
|
||||
answer = search_on_web(
|
||||
query=search_query, max_results=self.max_results)
|
||||
|
||||
@ -5,7 +5,7 @@ SearchLinkNode Module
|
||||
# Imports from standard library
|
||||
from typing import List, Optional
|
||||
from tqdm import tqdm
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
# Imports from Langchain
|
||||
from langchain.prompts import PromptTemplate
|
||||
@ -59,7 +59,8 @@ class SearchLinkNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("search link node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -4,7 +4,7 @@ TextToSpeechNode Module
|
||||
|
||||
from typing import List, Optional
|
||||
from .base_node import BaseNode
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
class TextToSpeechNode(BaseNode):
|
||||
"""
|
||||
@ -45,7 +45,8 @@ class TextToSpeechNode(BaseNode):
|
||||
"""
|
||||
|
||||
if self.verbose:
|
||||
print(f"--- Executing {self.node_name} Node ---")
|
||||
logger = get_logger("text to speach node")
|
||||
logger.info(f"--- Executing {self.node_name} Node ---")
|
||||
|
||||
# Interpret input keys based on the provided input expression
|
||||
input_keys = self.get_input_keys(state)
|
||||
|
||||
@ -9,3 +9,4 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers
|
||||
from .save_audio_from_bytes import save_audio_from_bytes
|
||||
from .sys_dynamic_import import dynamic_import, srcfile_import
|
||||
from .cleanup_html import cleanup_html
|
||||
from .logging import *
|
||||
137
scrapegraphai/utils/logging.py
Normal file
137
scrapegraphai/utils/logging.py
Normal file
@ -0,0 +1,137 @@
|
||||
"""A centralized logging system for any library
|
||||
|
||||
source code inspired by https://github.com/huggingface/transformers/blob/main/src/transformers/utils/logging.py
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
_library_name = __name__.split(".", maxsplit=1)[0]
|
||||
|
||||
_default_handler = None
|
||||
_default_logging_level = logging.WARNING
|
||||
|
||||
_semaphore = threading.Lock()
|
||||
|
||||
|
||||
def _get_library_root_logger() -> logging.Logger:
|
||||
return logging.getLogger(_library_name)
|
||||
|
||||
|
||||
def _set_library_root_logger() -> None:
|
||||
global _default_handler
|
||||
|
||||
with _semaphore:
|
||||
if _default_handler: return
|
||||
|
||||
_default_handler = logging.StreamHandler() # sys.stderr as stream
|
||||
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/7334#issuecomment-1357447176
|
||||
if sys.stderr is None:
|
||||
sys.stderr = open(os.devnull, "w")
|
||||
|
||||
_default_handler.flush = sys.stderr.flush
|
||||
|
||||
library_root_logger = _get_library_root_logger()
|
||||
library_root_logger.addHandler(_default_handler)
|
||||
library_root_logger.setLevel(_default_logging_level)
|
||||
library_root_logger.propagate = False
|
||||
|
||||
|
||||
def get_logger(name: str | None = None) -> logging.Logger:
|
||||
_set_library_root_logger()
|
||||
return logging.getLogger(name or _library_name)
|
||||
|
||||
|
||||
def get_verbosity() -> int:
|
||||
_set_library_root_logger()
|
||||
return _get_library_root_logger().getEffectiveLevel()
|
||||
|
||||
|
||||
def set_verbosity(verbosity: int) -> None:
|
||||
_set_library_root_logger()
|
||||
_get_library_root_logger().setLevel(verbosity)
|
||||
|
||||
|
||||
def set_verbosity_debug() -> None:
|
||||
set_verbosity(logging.DEBUG)
|
||||
|
||||
|
||||
def set_verbosity_info() -> None:
|
||||
set_verbosity(logging.INFO)
|
||||
|
||||
|
||||
def set_verbosity_warning() -> None:
|
||||
set_verbosity(logging.WARNING)
|
||||
|
||||
|
||||
def set_verbosity_error() -> None:
|
||||
set_verbosity(logging.ERROR)
|
||||
|
||||
|
||||
def set_verbosity_fatal() -> None:
|
||||
set_verbosity(logging.FATAL)
|
||||
|
||||
|
||||
def set_handler(handler: logging.Handler) -> None:
|
||||
_set_library_root_logger()
|
||||
|
||||
assert handler is not None
|
||||
|
||||
_get_library_root_logger().addHandler(handler)
|
||||
|
||||
|
||||
def set_default_handler() -> None:
|
||||
set_handler(_default_handler)
|
||||
|
||||
|
||||
def unset_handler(handler: logging.Handler) -> None:
|
||||
_set_library_root_logger()
|
||||
|
||||
assert handler is not None
|
||||
|
||||
_get_library_root_logger().removeHandler(handler)
|
||||
|
||||
|
||||
def unset_default_handler() -> None:
|
||||
unset_handler(_default_handler)
|
||||
|
||||
|
||||
def set_propagation() -> None:
|
||||
_get_library_root_logger().propagate = True
|
||||
|
||||
|
||||
def unset_propagation() -> None:
|
||||
_get_library_root_logger().propagate = False
|
||||
|
||||
|
||||
def set_formatting() -> None:
|
||||
"""sets formatting for all handlers bound to the root logger
|
||||
|
||||
```
|
||||
[levelname|filename|line number] time >> message
|
||||
```
|
||||
"""
|
||||
formatter = logging.Formatter(
|
||||
"[%(levelname)s|%(filename)s:%(lineno)s] %(asctime)s >> %(message)s"
|
||||
)
|
||||
|
||||
for handler in _get_library_root_logger().handlers:
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
|
||||
def unset_formatting() -> None:
|
||||
for handler in _get_library_root_logger().handlers:
|
||||
handler.setFormatter(None)
|
||||
|
||||
|
||||
@lru_cache(None)
|
||||
def warning_once(self, *args, **kwargs):
|
||||
"""emits warning logs with the same message only once"""
|
||||
self.warning(*args, **kwargs)
|
||||
|
||||
|
||||
logging.Logger.warning_once = warning_once
|
||||
Loading…
Reference in New Issue
Block a user