added posthog proxy

This commit is contained in:
mohammadehsanansari 2025-12-08 14:23:35 +05:30
parent f038ca1596
commit e230856fbe

View File

@ -1,20 +1,3 @@
"""
This module contains code that relates to sending ScrapeGraphAI usage telemetry.
To disable sending telemetry there are three ways:
1. Set it to false programmatically in your driver:
>>> from scrapegraphai import telemetry
>>> telemetry.disable_telemetry()
2. Set it to `false` in ~/.scrapegraphai.conf under `DEFAULT`
[DEFAULT]
telemetry_enabled = False
3. Set SCRAPEGRAPHAI_TELEMETRY_ENABLED=false as an environment variable:
SCRAPEGRAPHAI_TELEMETRY_ENABLED=false python run.py
or:
export SCRAPEGRAPHAI_TELEMETRY_ENABLED=false
"""
import configparser
import functools
import importlib.metadata
@ -27,17 +10,19 @@ import uuid
from typing import Callable, Dict
from urllib import request
# Load version
VERSION = importlib.metadata.version("scrapegraphai")
STR_VERSION = ".".join([str(i) for i in VERSION])
HOST = "https://eu.i.posthog.com"
TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints
API_KEY = "phc_orsfU4aHhtpTSLVcUE2hdUkQDLM4OEQZndKGFBKMEtn"
# 🚀 Your proxy service endpoint (instead of PostHog)
PROXY_URL = "https://scrapegraph-proxy.onrender.com/capture/"
TIMEOUT = 2
DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.scrapegraphai.conf")
logger = logging.getLogger(__name__)
# Everything below remains mostly same
def _load_config(config_location: str) -> configparser.ConfigParser:
config = configparser.ConfigParser()
try:
@ -59,28 +44,22 @@ def _load_config(config_location: str) -> configparser.ConfigParser:
return config
def _check_config_and_environ_for_telemetry_flag(
telemetry_default: bool, config_obj: configparser.ConfigParser
) -> bool:
telemetry_enabled = telemetry_default
def _check_config_and_environ_for_telemetry_flag(default_value: bool, config_obj):
telemetry_enabled = default_value
if "telemetry_enabled" in config_obj["DEFAULT"]:
try:
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
except ValueError as e:
logger.debug(
f"""Unable to parse value for
`telemetry_enabled` from config. Encountered {e}"""
)
except Exception:
pass
if os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED") is not None:
env_value = os.environ.get("SCRAPEGRAPHAI_TELEMETRY_ENABLED")
config_obj["DEFAULT"]["telemetry_enabled"] = env_value
try:
telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled")
except ValueError as e:
logger.debug(
f"""Unable to parse value for `SCRAPEGRAPHAI_TELEMETRY_ENABLED`
from environment. Encountered {e}"""
telemetry_enabled = config_obj.getboolean(
"DEFAULT", "telemetry_enabled"
)
except Exception:
pass
return telemetry_enabled
@ -90,87 +69,70 @@ g_anonymous_id = config["DEFAULT"]["anonymous_id"]
CALL_COUNTER = 0
MAX_COUNT_SESSION = 1000
BASE_PROPERTIES = {
"os_type": os.name,
"os_version": platform.platform(),
"python_version": f"{platform.python_version()}/{platform.python_implementation()}",
"distinct_id": g_anonymous_id,
"scrapegraphai_version": VERSION,
"telemetry_version": "0.0.3",
"telemetry_version": "0.0.4-proxy",
}
def disable_telemetry():
"""
function for disabling the telemetries
"""
global g_telemetry_enabled
g_telemetry_enabled = False
def is_telemetry_enabled() -> bool:
"""
function for checking if a telemetry is enables
"""
if g_telemetry_enabled:
global CALL_COUNTER
if CALL_COUNTER == 0:
logger.debug(
"Note: ScrapeGraphAI collects anonymous usage data to improve the library. "
"You can disable telemetry by setting SCRAPEGRAPHAI_TELEMETRY_ENABLED=false or "
"by editing ~/.scrapegraphai.conf."
)
CALL_COUNTER += 1
if CALL_COUNTER > MAX_COUNT_SESSION:
return False
return True
else:
return False
return False
# ⭐ UPDATED FOR PROXY — send without API key
def _send_event_json(event_json: dict):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}",
"User-Agent": f"scrapegraphai/{STR_VERSION}",
}
try:
data = json.dumps(event_json).encode()
req = request.Request(TRACK_URL, data=data, headers=headers)
req = request.Request(PROXY_URL, data=data, headers=headers)
with request.urlopen(req, timeout=TIMEOUT) as f:
res = f.read()
response_body = f.read()
if f.code != 200:
raise RuntimeError(res)
raise RuntimeError(response_body)
except Exception as e:
logger.debug(f"Failed to send telemetry data: {e}")
logger.debug(f"Failed to send telemetry data to proxy: {e}")
else:
logger.debug(f"Telemetry data sent: {data}")
logger.debug(f"Telemetry payload forwarded to proxy: {data}")
def send_event_json(event_json: dict):
"""
fucntion for sending event json
"""
if not g_telemetry_enabled:
raise RuntimeError("Telemetry tracking is disabled!")
try:
th = threading.Thread(target=_send_event_json, args=(event_json,))
th.start()
except Exception as e:
logger.debug(f"Failed to send telemetry data in a thread: {e}")
logger.debug(f"Telemetry dispatch thread failed: {e}")
def log_event(event: str, properties: Dict[str, any]):
"""
function for logging the events
"""
if is_telemetry_enabled():
event_json = {
"api_key": API_KEY,
payload = {
"event": event,
"distinct_id": g_anonymous_id,
"properties": {**BASE_PROPERTIES, **properties},
}
send_event_json(event_json)
send_event_json(payload)
def log_graph_execution(
@ -188,10 +150,7 @@ def log_graph_execution(
exception: str = None,
total_tokens: int = None,
):
"""
function for logging the graph execution
"""
properties = {
props = {
"graph_name": graph_name,
"source": source,
"prompt": prompt,
@ -207,26 +166,15 @@ def log_graph_execution(
"total_tokens": total_tokens,
"type": "community-library",
}
log_event("graph_execution", properties)
log_event("graph_execution", props)
def capture_function_usage(call_fn: Callable) -> Callable:
"""
function that captures the usage
"""
@functools.wraps(call_fn)
def wrapped_fn(*args, **kwargs):
try:
return call_fn(*args, **kwargs)
finally:
if is_telemetry_enabled():
try:
function_name = call_fn.__name__
log_event("function_usage", {"function_name": function_name})
except Exception as e:
logger.debug(
f"Failed to send telemetry for function usage. Encountered: {e}"
)
return wrapped_fn
log_event("function_usage", {"function_name": call_fn.__name__})
return wrapped_fn