mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-12 21:01:54 +08:00
added posthog proxy
This commit is contained in:
parent
f038ca1596
commit
e230856fbe
@ -1,20 +1,3 @@
|
||||
"""
|
||||
This module contains code that relates to sending ScrapeGraphAI usage telemetry.
|
||||
|
||||
To disable sending telemetry there are three ways:
|
||||
|
||||
1. Set it to false programmatically in your driver:
|
||||
>>> from scrapegraphai import telemetry
|
||||
>>> telemetry.disable_telemetry()
|
||||
2. Set it to `false` in ~/.scrapegraphai.conf under `DEFAULT`
|
||||
[DEFAULT]
|
||||
telemetry_enabled = False
|
||||
3. Set SCRAPEGRAPHAI_TELEMETRY_ENABLED=false as an environment variable:
|
||||
SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python run.py
|
||||
or:
|
||||
export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false
|
||||
"""
|
||||
|
||||
import configparser
|
||||
import functools
|
||||
import importlib.metadata
|
||||
@ -27,17 +10,19 @@ import uuid
|
||||
from typing import Callable, Dict
|
||||
from urllib import request
|
||||
|
||||
# Load version
|
||||
VERSION = importlib.metadata.version("scrapegraphai")
|
||||
STR_VERSION = ".".join([str(i) for i in VERSION])
|
||||
HOST = "https://eu.i.posthog.com"
|
||||
TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints
|
||||
API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn"
|
||||
|
||||
# 🚀 Your proxy service endpoint (instead of PostHog)
|
||||
PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
|
||||
|
||||
TIMEOUT = 2
|
||||
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Everything below remains mostly same
|
||||
def _load_config(config_location: str) -> configparser.ConfigParser:
|
||||
config = configparser.ConfigParser()
|
||||
try:
|
||||
@ -59,28 +44,22 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
|
||||
return config
|
||||
|
||||
|
||||
def _check_config_and_environ_for_telemetry_flag(
|
||||
telemetry_default: bool, config_obj: configparser.ConfigParser
|
||||
) -> bool:
|
||||
telemetry_enabled = telemetry_default
|
||||
def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj):
|
||||
telemetry_enabled = default_value
|
||||
if "telemetry_enabled" in config_obj["DEFAULT"]:
|
||||
try:
|
||||
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
|
||||
except ValueError as e:
|
||||
logger.debug(
|
||||
f"""Unable to parse value for
|
||||
`telemetry_enabled` from config. Encountered {e}"""
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None:
|
||||
env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED")
|
||||
config_obj["DEFAULT"]["telemetry_enabled"] = env_value
|
||||
try:
|
||||
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
|
||||
except ValueError as e:
|
||||
logger.debug(
|
||||
f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED`
|
||||
from environment. Encountered {e}"""
|
||||
telemetry_enabled = config_obj.getboolean(
|
||||
"DEFAULT", "telemetry_enabled"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return telemetry_enabled
|
||||
|
||||
|
||||
@ -90,87 +69,70 @@ g_anonymous_id = config["DEFAULT"]["anonymous_id"]
|
||||
CALL_COUNTER = 0
|
||||
MAX_COUNT_SESSION = 1000
|
||||
|
||||
|
||||
BASE_PROPERTIES = {
|
||||
"os_type": os.name,
|
||||
"os_version": platform.platform(),
|
||||
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
|
||||
"distinct_id": g_anonymous_id,
|
||||
"scrapegraphai_version": VERSION,
|
||||
"telemetry_version": "0.0.3",
|
||||
"telemetry_version": "0.0.4-proxy",
|
||||
}
|
||||
|
||||
|
||||
def disable_telemetry():
|
||||
"""
|
||||
function for disabling the telemetries
|
||||
"""
|
||||
global g_telemetry_enabled
|
||||
g_telemetry_enabled = False
|
||||
|
||||
|
||||
def is_telemetry_enabled() -> bool:
|
||||
"""
|
||||
function for checking if a telemetry is enables
|
||||
"""
|
||||
if g_telemetry_enabled:
|
||||
global CALL_COUNTER
|
||||
if CALL_COUNTER == 0:
|
||||
logger.debug(
|
||||
"Note: ScrapeGraphAI collects anonymous usage data to improve the library. "
|
||||
"You can disable telemetry by setting SCRAPEGRAPHAI_TELEMETRY_ENABLED=false or "
|
||||
"by editing ~/.scrapegraphai.conf."
|
||||
)
|
||||
CALL_COUNTER += 1
|
||||
if CALL_COUNTER > MAX_COUNT_SESSION:
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
return False
|
||||
|
||||
|
||||
# ⭐ UPDATED FOR PROXY — send without API key
|
||||
def _send_event_json(event_json: dict):
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {API_KEY}",
|
||||
"User-Agent": f"scrapegraphai/{STR_VERSION}",
|
||||
}
|
||||
try:
|
||||
data = json.dumps(event_json).encode()
|
||||
req = request.Request(TRACK_URL, data=data, headers=headers)
|
||||
req = request.Request(PROXY_URL, data=data, headers=headers)
|
||||
|
||||
with request.urlopen(req, timeout=TIMEOUT) as f:
|
||||
res = f.read()
|
||||
response_body = f.read()
|
||||
if f.code != 200:
|
||||
raise RuntimeError(res)
|
||||
raise RuntimeError(response_body)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to send telemetry data: {e}")
|
||||
logger.debug(f"Failed to send telemetry data to proxy: {e}")
|
||||
else:
|
||||
logger.debug(f"Telemetry data sent: {data}")
|
||||
logger.debug(f"Telemetry payload forwarded to proxy: {data}")
|
||||
|
||||
|
||||
def send_event_json(event_json: dict):
|
||||
"""
|
||||
fucntion for sending event json
|
||||
"""
|
||||
if not g_telemetry_enabled:
|
||||
raise RuntimeError("Telemetry tracking is disabled!")
|
||||
try:
|
||||
th = threading.Thread(target=_send_event_json, args=(event_json,))
|
||||
th.start()
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to send telemetry data in a thread: {e}")
|
||||
logger.debug(f"Telemetry dispatch thread failed: {e}")
|
||||
|
||||
|
||||
def log_event(event: str, properties: Dict[str, any]):
|
||||
"""
|
||||
function for logging the events
|
||||
"""
|
||||
if is_telemetry_enabled():
|
||||
event_json = {
|
||||
"api_key": API_KEY,
|
||||
payload = {
|
||||
"event": event,
|
||||
"distinct_id": g_anonymous_id,
|
||||
"properties": {**BASE_PROPERTIES, **properties},
|
||||
}
|
||||
send_event_json(event_json)
|
||||
send_event_json(payload)
|
||||
|
||||
|
||||
def log_graph_execution(
|
||||
@ -188,10 +150,7 @@ def log_graph_execution(
|
||||
exception: str = None,
|
||||
total_tokens: int = None,
|
||||
):
|
||||
"""
|
||||
function for logging the graph execution
|
||||
"""
|
||||
properties = {
|
||||
props = {
|
||||
"graph_name": graph_name,
|
||||
"source": source,
|
||||
"prompt": prompt,
|
||||
@ -207,26 +166,15 @@ def log_graph_execution(
|
||||
"total_tokens": total_tokens,
|
||||
"type": "community-library",
|
||||
}
|
||||
log_event("graph_execution", properties)
|
||||
log_event("graph_execution", props)
|
||||
|
||||
|
||||
def capture_function_usage(call_fn: Callable) -> Callable:
|
||||
"""
|
||||
function that captures the usage
|
||||
"""
|
||||
|
||||
@functools.wraps(call_fn)
|
||||
def wrapped_fn(*args, **kwargs):
|
||||
try:
|
||||
return call_fn(*args, **kwargs)
|
||||
finally:
|
||||
if is_telemetry_enabled():
|
||||
try:
|
||||
function_name = call_fn.__name__
|
||||
log_event("function_usage", {"function_name": function_name})
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Failed to send telemetry for function usage. Encountered: {e}"
|
||||
)
|
||||
|
||||
return wrapped_fn
|
||||
log_event("function_usage", {"function_name": call_fn.__name__})
|
||||
return wrapped_fn
|
||||
Loading…
Reference in New Issue
Block a user