diff --git a/puppet/zulip/files/cron.d/rabbitmq-numconsumers b/puppet/zulip/files/cron.d/rabbitmq-numconsumers index 4547277323..0bcf35d00b 100644 --- a/puppet/zulip/files/cron.d/rabbitmq-numconsumers +++ b/puppet/zulip/files/cron.d/rabbitmq-numconsumers @@ -10,4 +10,5 @@ USER=root * * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file signups * * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file message_sender * * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file feedback_messages +* * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file error_reports * * * * * root /home/zulip/deployments/current/bin/write-rabbitmq-consumers-state-file digest_emails diff --git a/puppet/zulip/files/supervisor/conf.d/zulip.conf b/puppet/zulip/files/supervisor/conf.d/zulip.conf index 2727312790..1418e622bb 100644 --- a/puppet/zulip/files/supervisor/conf.d/zulip.conf +++ b/puppet/zulip/files/supervisor/conf.d/zulip.conf @@ -164,6 +164,18 @@ redirect_stderr=true ; redirect proc stderr to stdout (default false) stdout_logfile=/var/log/zulip/events-feedback_messages.log ; stdout log path, NONE for none; default AUTO directory=/home/zulip/deployments/current/ +[program:zulip-events-error_reports] +command=python /home/zulip/deployments/current/manage.py process_queue error_reports +priority=600 ; the relative start priority (default 999) +autostart=true ; start at supervisord start (default: true) +autorestart=true ; whether/when to restart (default: unexpected) +stopsignal=TERM ; signal used to kill process (default TERM) +stopwaitsecs=30 ; max num secs to wait b4 SIGKILL (default 10) +user=zulip ; setuid to this UNIX account to run the program +redirect_stderr=true ; redirect proc stderr to stdout (default false) +stdout_logfile=/var/log/zulip/events-error_reports.log ; stdout log path, NONE for none; default AUTO +directory=/home/zulip/deployments/current/ + [program:zulip-events-digest_emails] command=python /home/zulip/deployments/current/manage.py process_queue digest_emails priority=600 ; the relative start priority (default 999) @@ -181,7 +193,7 @@ directory=/home/zulip/deployments/current/ ; process groups. [group:zulip-workers] -programs=zulip-events-user-activity,zulip-events-user-activity-interval,zulip-events-user-presence,zulip-events-signups,zulip-events-confirmation-emails,zulip-events-missedmessage_reminders,zulip-events-slowqueries,zulip-events-message_sender,zulip-events-feedback_messages,zulip-events-digest_emails ; each refers to 'x' in [program:x] definitions +programs=zulip-events-user-activity,zulip-events-user-activity-interval,zulip-events-user-presence,zulip-events-signups,zulip-events-confirmation-emails,zulip-events-missedmessage_reminders,zulip-events-slowqueries,zulip-events-message_sender,zulip-events-feedback_messages,zulip-events-digest_emails,zulip-events-error_reports ; each refers to 'x' in [program:x] definitions ; The [include] section can just contain the "files" setting. This ; setting can list multiple files (separated by whitespace or diff --git a/zerver/handlers.py b/zerver/handlers.py index 7ee889e0cb..a404de2d9d 100644 --- a/zerver/handlers.py +++ b/zerver/handlers.py @@ -10,6 +10,8 @@ from django.core import mail from django.utils.log import AdminEmailHandler from django.views.debug import ExceptionReporter, get_exception_reporter_filter +from zerver.lib.queue import queue_json_publish + def format_record(record): """ Given a Django error LogRecord, format and return the interesting details, @@ -32,10 +34,8 @@ def format_record(record): return (subject, stack_trace, user_info) class AdminZulipHandler(logging.Handler): - """An exception log handler that Zulips log entries to the Zulip realm. - - If the request is passed as the first argument to the log record, - request data will be provided in the email report. + """An exception log handler that sends the exception to the queue to be + sent to the Zulip feedback server. """ # adapted in part from django/utils/log.py @@ -44,49 +44,63 @@ class AdminZulipHandler(logging.Handler): logging.Handler.__init__(self) def emit(self, record): - if not settings.ERROR_BOT: - return - - # We have to defer imports to avoid circular imports in settings.py. - from zerver.lib.actions import internal_send_message - - try: request = record.request filter = get_exception_reporter_filter(request) - request_repr = "Request info:\n~~~~\n" - request_repr += "- path: %s\n" % (request.path,) - if request.method == "GET": - request_repr += "- GET: %s\n" % (request.GET,) - elif request.method == "POST": - request_repr += "- POST: %s\n" % (filter.get_post_parameters(request),) - for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]: - request_repr += "- %s: \"%s\"\n" % (field, request.META.get(field, "(None)")) - request_repr += "~~~~" - except Exception: - request_repr = "Log record message:\n%s" % (record.getMessage(),) - subject, stack_trace, user_info = format_record(record) + if record.exc_info: + stack_trace = ''.join(traceback.format_exception(*record.exc_info)) + else: + stack_trace = None + + try: + user_profile = request.user + user_full_name = user_profile.full_name + user_email = user_profile.email + except Exception: + traceback.print_exc() + # Error was triggered by an anonymous user. + user_full_name = None + user_email = None + + report = dict( + node = platform.node(), + method = request.method, + path = request.path, + data = request.GET if request.method == 'GET' + else filter.get_post_parameters(request), + remote_addr = request.META.get('REMOTE_ADDR', None), + query_string = request.META.get('QUERY_STRING', None), + server_name = request.META.get('SERVER_NAME', None), + message = record.getMessage(), + stack_trace = stack_trace, + user_full_name = user_full_name, + user_email = user_email, + ) + except: + traceback.print_exc() + report = dict( + node = platform.node(), + message = record.getMessage(), + ) try: - internal_send_message(settings.ERROR_BOT, - "stream", "errors", self.format_subject(subject), - "Error generated by %s\n\n~~~~ pytb\n%s\n\n~~~~\n%s" % ( - user_info, stack_trace, request_repr)) + if not settings.STAGING_DEPLOYED: + queue_json_publish('error_reports', dict( + type = "server", + report = report, + ), lambda x: None) + else: + # On staging, process the report directly so it can happen inside this + # try/except to prevent looping + from zilencer.error_notify import notify_server_error + notify_server_error(report) except: # If this breaks, complain loudly but don't pass the traceback up the stream # However, we *don't* want to use logging.exception since that could trigger a loop. logging.warning("Reporting an exception triggered an exception!", exc_info=True) - def format_subject(self, subject): - """ - Escape CR and LF characters, and limit length to MAX_SUBJECT_LENGTH. - """ - from zerver.models import MAX_SUBJECT_LENGTH - formatted_subject = subject.replace('\n', '\\n').replace('\r', '\\r') - return formatted_subject[:MAX_SUBJECT_LENGTH] - class ZulipAdminEmailHandler(AdminEmailHandler): """An exception log handler that emails log entries to site admins. diff --git a/zerver/views/__init__.py b/zerver/views/__init__.py index e6498d5ab3..01b942a033 100644 --- a/zerver/views/__init__.py +++ b/zerver/views/__init__.py @@ -1930,34 +1930,32 @@ def json_report_error(request, user_profile, message=REQ, stacktrace=REQ, ui_message=REQ(converter=json_to_bool), user_agent=REQ, href=REQ, log=REQ, more_info=REQ(converter=json_to_dict, default=None)): - subject = "error for %s" % (user_profile.email,) - if ui_message: - subject = "User-visible browser " + subject - else: - subject = "Browser " + subject if js_source_map: stacktrace = js_source_map.annotate_stacktrace(stacktrace) - body = ("Message:\n%s\n\nStacktrace:\n%s\n\nUser agent: %s\nhref: %s\n" - "User saw error in UI: %s\n" - % (message, stacktrace, user_agent, href, ui_message)) - - body += "Server path: %s\n" % (settings.DEPLOY_ROOT,) try: - body += "Deployed version: %s" % ( - subprocess.check_output(["git", "log", "HEAD^..HEAD", "--oneline"]),) + version = subprocess.check_output(["git", "log", "HEAD^..HEAD", "--oneline"]) except Exception: - body += "Could not determine current git commit ID.\n" + version = None - if more_info is not None: - body += "\nAdditional information:" - for (key, value) in more_info.iteritems(): - body += "\n %s: %s" % (key, value) + queue_json_publish('error_reports', dict( + type = "browser", + report = dict( + user_email = user_profile.email, + user_full_name = user_profile.full_name, + user_visible = ui_message, + server_path = settings.DEPLOY_ROOT, + version = version, + user_agent = user_agent, + href = href, + message = message, + stacktrace = stacktrace, + log = log, + more_info = more_info, + ) + ), lambda x: None) - body += "\n\nLog:\n%s" % (log,) - - mail_admins(subject, body) return json_success() @authenticated_json_post_view diff --git a/zerver/worker/queue_processors.py b/zerver/worker/queue_processors.py index 38d83b3473..8baeb294d4 100644 --- a/zerver/worker/queue_processors.py +++ b/zerver/worker/queue_processors.py @@ -188,16 +188,19 @@ class MissedMessageWorker(QueueProcessingWorker): # of messages time.sleep(2 * 60) +def make_feedback_client(): + sys.path.append(os.path.join(os.path.dirname(__file__), '../../api')) + import zulip + return zulip.Client( + email=settings.DEPLOYMENT_ROLE_NAME, + api_key=settings.DEPLOYMENT_ROLE_KEY, + verbose=True, + site=settings.FEEDBACK_TARGET) + @assign_queue('feedback_messages') class FeedbackBot(QueueProcessingWorker): def start(self): - sys.path.append(os.path.join(os.path.dirname(__file__), '../../api')) - import zulip - self.staging_client = zulip.Client( - email=settings.DEPLOYMENT_ROLE_NAME, - api_key=settings.DEPLOYMENT_ROLE_KEY, - verbose=True, - site=settings.FEEDBACK_TARGET) + self.staging_client = make_feedback_client() self.staging_client._register( 'forward_feedback', method='POST', @@ -209,6 +212,21 @@ class FeedbackBot(QueueProcessingWorker): def consume(self, event): self.staging_client.forward_feedback(event) +@assign_queue('error_reports') +class ErrorReporter(QueueProcessingWorker): + def start(self): + self.staging_client = make_feedback_client() + self.staging_client._register( + 'forward_error', + method='POST', + url='deployments/report_error', + make_request=(lambda type, report: {'type': type, 'report': simplejson.dumps(report)}), + ) + QueueProcessingWorker.start(self) + + def consume(self, event): + self.staging_client.forward_error(event['type'], event['report']) + @assign_queue('slow_queries') class SlowQueryWorker(QueueProcessingWorker): def start(self): diff --git a/zilencer/error_notify.py b/zilencer/error_notify.py new file mode 100644 index 0000000000..c03b06af75 --- /dev/null +++ b/zilencer/error_notify.py @@ -0,0 +1,125 @@ +from collections import defaultdict +import logging + +from django.conf import settings +from django.core.mail import mail_admins + +from zerver.lib.actions import internal_send_message + +def format_subject(subject): + """ + Escape CR and LF characters, and limit length to MAX_SUBJECT_LENGTH. + """ + from zerver.models import MAX_SUBJECT_LENGTH + subject = subject.replace('\n', '\\n').replace('\r', '\\r') + + if len(subject) > MAX_SUBJECT_LENGTH: + subject = subject[:MAX_SUBJECT_LENGTH-3].rstrip() + "..." + + return subject + +def user_info_str(report): + if report['user_full_name'] and report['user_email']: + user_info = "%(user_full_name)s (%(user_email)s)" % (report) + else: + user_info = "Anonymous user (not logged in)" + + user_info += " on %s deployment" % (report['deployment'],) + return user_info + +def notify_browser_error(report): + report = defaultdict(lambda: None, report) + if settings.ERROR_BOT: + zulip_browser_error(report) + email_browser_error(report) + +def email_browser_error(report): + subject = "Browser error for %s" % (user_info_str(report)) + + body = ("User: %(user_full_name)s <%(user_email)s> on %(deployment)s\n\n" + "Message:\n%(message)s\n\nStacktrace:\n%(stacktrace)s\n\n" + "User agent: %(user_agent)s\n" + "href: %(href)s\n" + "Server path: %(server_path)s\n" + "Deployed version: %(version)s\n" + % report) + + more_info = report['more_info'] + if more_info is not None: + body += "\nAdditional information:" + for (key, value) in more_info.iteritems(): + body += "\n %s: %s" % (key, value) + + body += "\n\nLog:\n%s" % (report['log'],) + + mail_admins(subject, body) + +def zulip_browser_error(report): + subject = "JS error: %s" % (report['user_email'],) + + user_info = user_info_str(report) + + body = "User: %s\n" % (user_info,) + body += ( + "Message: %(message)s\n" + "~~~\n%(stacktrace)s\n~~~\n" + "User agent: `%(user_agent)s`\n" + "href: `%(href)s`\n" + "Server path: `%(server_path)s`\n" + "Deployed version: `%(version)s`\n" + % report ) + + more_info = report['more_info'] + if more_info is not None: + body += "\nAdditional information:\n~~~" + for (key, value) in more_info.iteritems(): + body += "\n %s: %s" % (key, value) + + body += "Log:\n~~~\n%s\n~~~" % (report['log'],) + + internal_send_message(settings.ERROR_BOT, + "stream", "errors", format_subject(subject), body) + +def notify_server_error(report): + report = defaultdict(lambda: None, report) + email_server_error(report) + if settings.ERROR_BOT: + zulip_server_error(report) + +def zulip_server_error(report): + subject = '%(node)s: %(message)s' % report + stack_trace = report['stack_trace'] or "No stack trace available" + + user_info = user_info_str(report) + + request_repr = ( + "Request info:\n~~~~\n" + "- path: %(path)s\n" + "- %(method)s: %(data)s\n") % (report) + + for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]: + request_repr += "- %s: \"%s\"\n" % (field, report.get(field.lower())) + request_repr += "~~~~" + + internal_send_message(settings.ERROR_BOT, + "stream", "errors", format_subject(subject), + "Error generated by %s\n\n~~~~ pytb\n%s\n\n~~~~\n%s" % ( + user_info, stack_trace, request_repr)) + +def email_server_error(report): + subject = '%(node)s: %(message)s' % (report) + + user_info = user_info_str(report) + + request_repr = ( + "Request info:\n" + "- path: %(path)s\n" + "- %(method)s: %(data)s\n") % (report) + + for field in ["REMOTE_ADDR", "QUERY_STRING", "SERVER_NAME"]: + request_repr += "- %s: \"%s\"\n" % (field, report.get(field.lower())) + + message = "Error generated by %s\n\n%s\n\n%s" % (user_info, report['stack_trace'], + request_repr) + + mail_admins(format_subject(subject), message, fail_silently=True) diff --git a/zilencer/urls/api.py b/zilencer/urls/api.py index 4f7e594317..b550d4f479 100644 --- a/zilencer/urls/api.py +++ b/zilencer/urls/api.py @@ -3,5 +3,7 @@ from django.conf.urls import patterns, url, include urlpatterns = patterns('zilencer.views', url('^feedback$', 'rest_dispatch', {'POST': 'submit_feedback'}), + url('^report_error$', 'rest_dispatch', + {'POST': 'report_error'}), url('^endpoints$', 'lookup_endpoints_for_user'), ) diff --git a/zilencer/views.py b/zilencer/views.py index 27ee32a602..fde792c17a 100644 --- a/zilencer/views.py +++ b/zilencer/views.py @@ -9,7 +9,7 @@ from zerver.lib.response import json_success, json_error, json_response, json_me from zerver.lib.rest import rest_dispatch as _rest_dispatch from zerver.models import get_realm, get_user_profile_by_email, email_to_domain, \ UserProfile - +from error_notify import notify_server_error, notify_browser_error rest_dispatch = csrf_exempt((lambda request, *args, **kwargs: _rest_dispatch(request, globals(), *args, **kwargs))) @@ -51,6 +51,16 @@ def submit_feedback(request, deployment, message=REQ(converter=json_to_dict)): return HttpResponse(message['sender_email']) +@has_request_variables +def report_error(request, deployment, type=REQ, report=REQ(converter=json_to_dict)): + report['deployment'] = deployment.name + if type == 'browser': + notify_browser_error(report) + elif type == 'server': + notify_server_error(report) + else: + return json_error("Invalid type parameter") + return json_response({}) def realm_for_email(email): try: