From 583a6bbadd267cdc57de06ea78fa9f5e6f2ff6d1 Mon Sep 17 00:00:00 2001 From: Steve Howell Date: Tue, 4 Oct 2016 06:52:26 -0700 Subject: [PATCH] Extract zerver/lib/message.py. This pulls message-related code from models.py into a new module called message.py, and it starts to break some bugdown dependencies. All the methods here are basically related to serializing Message objects as dictionaries for caches and events. extract_message_dict stringify_message_dict message_to_dict message_to_dict_json MessageDict.to_dict_uncached MessageDict.to_dict_uncached_helper MessageDict.build_dict_from_raw_db_row MessageDict.build_message_dict This fix also removes a circular dependency related to get_avatar_url. Also, there was kind of a latent bug in Message.need_to_render_content where it was depending on other calls to Message to import bugdown and set it globally in the namespace. We really need to just eliminate the function, since it's so small and used by code that may be doing very sketchy things, but for now I just fix it. (The bug would possibly be exposed by moving build_message_dict out to the library.) --- zerver/lib/actions.py | 13 ++- zerver/lib/message.py | 213 ++++++++++++++++++++++++++++++++++ zerver/models.py | 211 ++------------------------------- zerver/tests/test_messages.py | 13 ++- zerver/tests/test_narrow.py | 5 +- zerver/views/messages.py | 10 +- 6 files changed, 250 insertions(+), 215 deletions(-) create mode 100644 zerver/lib/message.py diff --git a/zerver/lib/actions.py b/zerver/lib/actions.py index e381559a88..637289416b 100644 --- a/zerver/lib/actions.py +++ b/zerver/lib/actions.py @@ -11,11 +11,14 @@ from django.core import validators from django.contrib.sessions.models import Session from zerver.lib.bugdown import BugdownRenderingException from zerver.lib.cache import ( - flush_user_profile, to_dict_cache_key, to_dict_cache_key_id, ) from zerver.lib.context_managers import lockfile +from zerver.lib.message import ( + MessageDict, + message_to_dict, +) from zerver.models import Realm, RealmEmoji, Stream, UserProfile, UserActivity, \ Subscription, Recipient, Message, Attachment, UserMessage, valid_stream_name, \ Client, DefaultStream, UserPresence, Referral, PushDeviceToken, MAX_SUBJECT_LENGTH, \ @@ -768,8 +771,8 @@ def do_send_messages(messages): event = dict( type = 'message', message = message['message'].id, - message_dict_markdown = message['message'].to_dict(apply_markdown=True), - message_dict_no_markdown = message['message'].to_dict(apply_markdown=False), + message_dict_markdown = message_to_dict(message['message'], apply_markdown=True), + message_dict_no_markdown = message_to_dict(message['message'], apply_markdown=False), presences = presences) users = [{'id': user.id, 'flags': user_flags.get(user.id, []), @@ -2562,9 +2565,9 @@ def do_update_message(user_profile, message, subject, propagate_mode, content, r for changed_message in changed_messages: event['message_ids'].append(changed_message.id) items_for_remote_cache[to_dict_cache_key(changed_message, True)] = \ - (changed_message.to_dict_uncached(apply_markdown=True),) + (MessageDict.to_dict_uncached(changed_message, apply_markdown=True),) items_for_remote_cache[to_dict_cache_key(changed_message, False)] = \ - (changed_message.to_dict_uncached(apply_markdown=False),) + (MessageDict.to_dict_uncached(changed_message, apply_markdown=False),) cache_set_many(items_for_remote_cache) def user_info(um): diff --git a/zerver/lib/message.py b/zerver/lib/message.py new file mode 100644 index 0000000000..44d266996a --- /dev/null +++ b/zerver/lib/message.py @@ -0,0 +1,213 @@ +from __future__ import absolute_import + +import datetime +import ujson +import zlib + +from six import binary_type, text_type + +from zerver.lib.avatar import get_avatar_url +from zerver.lib.avatar_hash import gravatar_hash +import zerver.lib.bugdown as bugdown +from zerver.lib.cache import cache_with_key, to_dict_cache_key +from zerver.lib.str_utils import force_bytes, dict_with_str_keys +from zerver.lib.timestamp import datetime_to_timestamp + +from zerver.models import ( + get_display_recipient_by_id, + Message, + Recipient, +) + +from typing import Any, Dict, Optional + +def extract_message_dict(message_bytes): + # type: (binary_type) -> Dict[str, Any] + return dict_with_str_keys(ujson.loads(zlib.decompress(message_bytes).decode("utf-8"))) + +def stringify_message_dict(message_dict): + # type: (Dict[str, Any]) -> binary_type + return zlib.compress(force_bytes(ujson.dumps(message_dict))) + +def message_to_dict(message, apply_markdown): + # type: (Message, bool) -> Dict[str, Any] + json = message_to_dict_json(message, apply_markdown) + return extract_message_dict(json) + +@cache_with_key(to_dict_cache_key, timeout=3600*24) +def message_to_dict_json(message, apply_markdown): + # type: (Message, bool) -> binary_type + return MessageDict.to_dict_uncached(message, apply_markdown) + +class MessageDict(object): + @staticmethod + def to_dict_uncached(message, apply_markdown): + # type: (Message, bool) -> binary_type + dct = MessageDict.to_dict_uncached_helper(message, apply_markdown) + return stringify_message_dict(dct) + + @staticmethod + def to_dict_uncached_helper(message, apply_markdown): + # type: (Message, bool) -> Dict[str, Any] + return MessageDict.build_message_dict( + apply_markdown = apply_markdown, + message = message, + message_id = message.id, + last_edit_time = message.last_edit_time, + edit_history = message.edit_history, + content = message.content, + subject = message.subject, + pub_date = message.pub_date, + rendered_content = message.rendered_content, + rendered_content_version = message.rendered_content_version, + sender_id = message.sender.id, + sender_email = message.sender.email, + sender_realm_domain = message.sender.realm.domain, + sender_full_name = message.sender.full_name, + sender_short_name = message.sender.short_name, + sender_avatar_source = message.sender.avatar_source, + sender_is_mirror_dummy = message.sender.is_mirror_dummy, + sending_client_name = message.sending_client.name, + recipient_id = message.recipient.id, + recipient_type = message.recipient.type, + recipient_type_id = message.recipient.type_id, + ) + + @staticmethod + def build_dict_from_raw_db_row(row, apply_markdown): + # type: (Dict[str, Any], bool) -> Dict[str, Any] + ''' + row is a row from a .values() call, and it needs to have + all the relevant fields populated + ''' + return MessageDict.build_message_dict( + apply_markdown = apply_markdown, + message = None, + message_id = row['id'], + last_edit_time = row['last_edit_time'], + edit_history = row['edit_history'], + content = row['content'], + subject = row['subject'], + pub_date = row['pub_date'], + rendered_content = row['rendered_content'], + rendered_content_version = row['rendered_content_version'], + sender_id = row['sender_id'], + sender_email = row['sender__email'], + sender_realm_domain = row['sender__realm__domain'], + sender_full_name = row['sender__full_name'], + sender_short_name = row['sender__short_name'], + sender_avatar_source = row['sender__avatar_source'], + sender_is_mirror_dummy = row['sender__is_mirror_dummy'], + sending_client_name = row['sending_client__name'], + recipient_id = row['recipient_id'], + recipient_type = row['recipient__type'], + recipient_type_id = row['recipient__type_id'], + ) + + @staticmethod + def build_message_dict( + apply_markdown, + message, + message_id, + last_edit_time, + edit_history, + content, + subject, + pub_date, + rendered_content, + rendered_content_version, + sender_id, + sender_email, + sender_realm_domain, + sender_full_name, + sender_short_name, + sender_avatar_source, + sender_is_mirror_dummy, + sending_client_name, + recipient_id, + recipient_type, + recipient_type_id, + ): + # type: (bool, Message, int, datetime.datetime, text_type, text_type, text_type, datetime.datetime, text_type, Optional[int], int, text_type, text_type, text_type, text_type, text_type, bool, text_type, int, int, int) -> Dict[str, Any] + + avatar_url = get_avatar_url(sender_avatar_source, sender_email) + + display_recipient = get_display_recipient_by_id( + recipient_id, + recipient_type, + recipient_type_id + ) + + if recipient_type == Recipient.STREAM: + display_type = "stream" + elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL): + assert not isinstance(display_recipient, text_type) + display_type = "private" + if len(display_recipient) == 1: + # add the sender in if this isn't a message between + # someone and his self, preserving ordering + recip = {'email': sender_email, + 'domain': sender_realm_domain, + 'full_name': sender_full_name, + 'short_name': sender_short_name, + 'id': sender_id, + 'is_mirror_dummy': sender_is_mirror_dummy} + if recip['email'] < display_recipient[0]['email']: + display_recipient = [recip, display_recipient[0]] + elif recip['email'] > display_recipient[0]['email']: + display_recipient = [display_recipient[0], recip] + + obj = dict( + id = message_id, + sender_email = sender_email, + sender_full_name = sender_full_name, + sender_short_name = sender_short_name, + sender_domain = sender_realm_domain, + sender_id = sender_id, + type = display_type, + display_recipient = display_recipient, + recipient_id = recipient_id, + subject = subject, + timestamp = datetime_to_timestamp(pub_date), + gravatar_hash = gravatar_hash(sender_email), # Deprecated June 2013 + avatar_url = avatar_url, + client = sending_client_name) + + obj['subject_links'] = bugdown.subject_links(sender_realm_domain.lower(), subject) + + if last_edit_time != None: + obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time) + obj['edit_history'] = ujson.loads(edit_history) + + if apply_markdown: + if Message.need_to_render_content(rendered_content, rendered_content_version, bugdown.version): + if message is None: + # We really shouldn't be rendering objects in this method, but there is + # a scenario where we upgrade the version of bugdown and fail to run + # management commands to re-render historical messages, and then we + # need to have side effects. This method is optimized to not need full + # blown ORM objects, but the bugdown renderer is unfortunately highly + # coupled to Message, and we also need to persist the new rendered content. + # If we don't have a message object passed in, we get one here. The cost + # of going to the DB here should be overshadowed by the cost of rendering + # and updating the row. + # TODO: see #1379 to eliminate bugdown dependencies + message = Message.objects.select_related().get(id=message_id) + + # It's unfortunate that we need to have side effects on the message + # in some cases. + rendered_content = message.render_markdown(content, sender_realm_domain) + message.set_rendered_content(rendered_content, True) + + if rendered_content is not None: + obj['content'] = rendered_content + else: + obj['content'] = u'

[Zulip note: Sorry, we could not understand the formatting of your message]

' + + obj['content_type'] = 'text/html' + else: + obj['content'] = content + obj['content_type'] = 'text/x-markdown' + + return obj + diff --git a/zerver/models.py b/zerver/models.py index 33dd7093b9..bbfd280971 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -17,11 +17,10 @@ from zerver.lib.cache import cache_with_key, flush_user_profile, flush_realm, \ display_recipient_cache_key, cache_delete, \ get_stream_cache_key, active_user_dicts_in_realm_cache_key, \ active_bot_dicts_in_realm_cache_key, active_user_dict_fields, \ - active_bot_dict_fields, flush_message, to_dict_cache_key + active_bot_dict_fields, flush_message from zerver.lib.utils import make_safe_digest, generate_random_token -from zerver.lib.str_utils import force_bytes, ModelReprMixin, dict_with_str_keys +from zerver.lib.str_utils import ModelReprMixin from django.db import transaction -from zerver.lib.avatar_hash import gravatar_hash from zerver.lib.camo import get_camo_url from django.utils import timezone from django.contrib.sessions.models import Session @@ -29,7 +28,6 @@ from zerver.lib.timestamp import datetime_to_timestamp from django.db.models.signals import pre_save, post_save, post_delete from django.core.validators import MinLengthValidator, RegexValidator from django.utils.translation import ugettext_lazy as _ -import zlib from bitfield import BitField from bitfield.types import BitHandler @@ -37,9 +35,8 @@ from collections import defaultdict from datetime import timedelta import pylibmc import re -import ujson import logging -from six import binary_type, text_type +from six import text_type import time import datetime @@ -774,14 +771,6 @@ def bulk_get_recipients(type, type_ids): return generic_bulk_cached_fetch(cache_key_function, query_function, type_ids, id_fetcher=lambda recipient: recipient.type_id) -def extract_message_dict(message_bytes): - # type: (binary_type) -> Dict[str, Any] - return dict_with_str_keys(ujson.loads(zlib.decompress(message_bytes).decode("utf-8"))) - -def stringify_message_dict(message_dict): - # type: (Dict[str, Any]) -> binary_type - return zlib.compress(force_bytes(ujson.dumps(message_dict))) - class Message(ModelReprMixin, models.Model): sender = models.ForeignKey(UserProfile) # type: UserProfile recipient = models.ForeignKey(Recipient) # type: Recipient @@ -898,199 +887,17 @@ class Message(ModelReprMixin, models.Model): import zerver.lib.bugdown as bugdown # 'from zerver.lib import bugdown' gives mypy error in python 3 mode. - if Message.need_to_render_content(self.rendered_content, self.rendered_content_version): + if Message.need_to_render_content(self.rendered_content, + self.rendered_content_version, + bugdown.version): return self.set_rendered_content(self.render_markdown(self.content, domain), save) else: return True @staticmethod - def need_to_render_content(rendered_content, rendered_content_version): - # type: (Optional[text_type], int) -> bool - return rendered_content is None or rendered_content_version < bugdown.version - - def to_dict(self, apply_markdown): - # type: (bool) -> Dict[str, Any] - return extract_message_dict(self.to_dict_json(apply_markdown)) - - @cache_with_key(to_dict_cache_key, timeout=3600*24) - def to_dict_json(self, apply_markdown): - # type: (bool) -> binary_type - return self.to_dict_uncached(apply_markdown) - - def to_dict_uncached(self, apply_markdown): - # type: (bool) -> binary_type - return stringify_message_dict(self.to_dict_uncached_helper(apply_markdown)) - - def to_dict_uncached_helper(self, apply_markdown): - # type: (bool) -> Dict[str, Any] - return Message.build_message_dict( - apply_markdown = apply_markdown, - message = self, - message_id = self.id, - last_edit_time = self.last_edit_time, - edit_history = self.edit_history, - content = self.content, - subject = self.subject, - pub_date = self.pub_date, - rendered_content = self.rendered_content, - rendered_content_version = self.rendered_content_version, - sender_id = self.sender.id, - sender_email = self.sender.email, - sender_realm_domain = self.sender.realm.domain, - sender_full_name = self.sender.full_name, - sender_short_name = self.sender.short_name, - sender_avatar_source = self.sender.avatar_source, - sender_is_mirror_dummy = self.sender.is_mirror_dummy, - sending_client_name = self.sending_client.name, - recipient_id = self.recipient.id, - recipient_type = self.recipient.type, - recipient_type_id = self.recipient.type_id, - ) - - @staticmethod - def build_dict_from_raw_db_row(row, apply_markdown): - # type: (Dict[str, Any], bool) -> Dict[str, Any] - ''' - row is a row from a .values() call, and it needs to have - all the relevant fields populated - ''' - return Message.build_message_dict( - apply_markdown = apply_markdown, - message = None, - message_id = row['id'], - last_edit_time = row['last_edit_time'], - edit_history = row['edit_history'], - content = row['content'], - subject = row['subject'], - pub_date = row['pub_date'], - rendered_content = row['rendered_content'], - rendered_content_version = row['rendered_content_version'], - sender_id = row['sender_id'], - sender_email = row['sender__email'], - sender_realm_domain = row['sender__realm__domain'], - sender_full_name = row['sender__full_name'], - sender_short_name = row['sender__short_name'], - sender_avatar_source = row['sender__avatar_source'], - sender_is_mirror_dummy = row['sender__is_mirror_dummy'], - sending_client_name = row['sending_client__name'], - recipient_id = row['recipient_id'], - recipient_type = row['recipient__type'], - recipient_type_id = row['recipient__type_id'], - ) - - @staticmethod - def build_message_dict( - apply_markdown, - message, - message_id, - last_edit_time, - edit_history, - content, - subject, - pub_date, - rendered_content, - rendered_content_version, - sender_id, - sender_email, - sender_realm_domain, - sender_full_name, - sender_short_name, - sender_avatar_source, - sender_is_mirror_dummy, - sending_client_name, - recipient_id, - recipient_type, - recipient_type_id, - ): - # type: (bool, Message, int, datetime.datetime, text_type, text_type, text_type, datetime.datetime, text_type, Optional[int], int, text_type, text_type, text_type, text_type, text_type, bool, text_type, int, int, int) -> Dict[str, Any] - # TODO: see #1379 to eliminate bugdown dependencies - global bugdown - if bugdown is None: - import zerver.lib.bugdown as bugdown - # 'from zerver.lib import bugdown' gives mypy error in python 3 mode. - - # TODO: Remove this import cycle - from zerver.lib.avatar import get_avatar_url - avatar_url = get_avatar_url(sender_avatar_source, sender_email) - - display_recipient = get_display_recipient_by_id( - recipient_id, - recipient_type, - recipient_type_id - ) - - if recipient_type == Recipient.STREAM: - display_type = "stream" - elif recipient_type in (Recipient.HUDDLE, Recipient.PERSONAL): - assert not isinstance(display_recipient, text_type) - display_type = "private" - if len(display_recipient) == 1: - # add the sender in if this isn't a message between - # someone and his self, preserving ordering - recip = {'email': sender_email, - 'domain': sender_realm_domain, - 'full_name': sender_full_name, - 'short_name': sender_short_name, - 'id': sender_id, - 'is_mirror_dummy': sender_is_mirror_dummy} - if recip['email'] < display_recipient[0]['email']: - display_recipient = [recip, display_recipient[0]] - elif recip['email'] > display_recipient[0]['email']: - display_recipient = [display_recipient[0], recip] - - obj = dict( - id = message_id, - sender_email = sender_email, - sender_full_name = sender_full_name, - sender_short_name = sender_short_name, - sender_domain = sender_realm_domain, - sender_id = sender_id, - type = display_type, - display_recipient = display_recipient, - recipient_id = recipient_id, - subject = subject, - timestamp = datetime_to_timestamp(pub_date), - gravatar_hash = gravatar_hash(sender_email), # Deprecated June 2013 - avatar_url = avatar_url, - client = sending_client_name) - - obj['subject_links'] = bugdown.subject_links(sender_realm_domain.lower(), subject) - - if last_edit_time != None: - obj['last_edit_timestamp'] = datetime_to_timestamp(last_edit_time) - obj['edit_history'] = ujson.loads(edit_history) - - if apply_markdown: - if Message.need_to_render_content(rendered_content, rendered_content_version): - if message is None: - # We really shouldn't be rendering objects in this method, but there is - # a scenario where we upgrade the version of bugdown and fail to run - # management commands to re-render historical messages, and then we - # need to have side effects. This method is optimized to not need full - # blown ORM objects, but the bugdown renderer is unfortunately highly - # coupled to Message, and we also need to persist the new rendered content. - # If we don't have a message object passed in, we get one here. The cost - # of going to the DB here should be overshadowed by the cost of rendering - # and updating the row. - # TODO: see #1379 to eliminate bugdown dependencies - message = Message.objects.select_related().get(id=message_id) - - # It's unfortunate that we need to have side effects on the message - # in some cases. - rendered_content = message.render_markdown(content, sender_realm_domain) - message.set_rendered_content(rendered_content, True) - - if rendered_content is not None: - obj['content'] = rendered_content - else: - obj['content'] = u'

[Zulip note: Sorry, we could not understand the formatting of your message]

' - - obj['content_type'] = 'text/html' - else: - obj['content'] = content - obj['content_type'] = 'text/x-markdown' - - return obj + def need_to_render_content(rendered_content, rendered_content_version, bugdown_version): + # type: (Optional[text_type], int, int) -> bool + return rendered_content is None or rendered_content_version < bugdown_version def to_log_dict(self): # type: () -> Dict[str, Any] diff --git a/zerver/tests/test_messages.py b/zerver/tests/test_messages.py index bf64c5c06d..40b105ae37 100644 --- a/zerver/tests/test_messages.py +++ b/zerver/tests/test_messages.py @@ -9,6 +9,11 @@ from zerver.decorator import JsonableError from zerver.lib.test_runner import slow from zilencer.models import Deployment +from zerver.lib.message import ( + MessageDict, + message_to_dict, +) + from zerver.lib.test_helpers import ( ZulipTestCase, get_user_messages, @@ -464,7 +469,7 @@ class MessageDictTest(ZulipTestCase): rows = list(Message.get_raw_db_rows(ids)) for row in rows: - Message.build_dict_from_raw_db_row(row, False) + MessageDict.build_dict_from_raw_db_row(row, False) delay = time.time() - t # Make sure we don't take longer than 1ms per message to extract messages. @@ -493,7 +498,7 @@ class MessageDictTest(ZulipTestCase): # An important part of this test is to get the message through this exact code path, # because there is an ugly hack we need to cover. So don't just say "row = message". row = Message.get_raw_db_rows([message.id])[0] - dct = Message.build_dict_from_raw_db_row(row, apply_markdown=True) + dct = MessageDict.build_dict_from_raw_db_row(row, apply_markdown=True) expected_content = '

hello world

' self.assertEqual(dct['content'], expected_content) message = Message.objects.get(id=message.id) @@ -789,8 +794,8 @@ class EditMessageTest(ZulipTestCase): def check_message(self, msg_id, subject=None, content=None): # type: (int, Optional[text_type], Optional[text_type]) -> Message msg = Message.objects.get(id=msg_id) - cached = msg.to_dict(False) - uncached = msg.to_dict_uncached_helper(False) + cached = message_to_dict(msg, False) + uncached = MessageDict.to_dict_uncached_helper(msg, False) self.assertEqual(cached, uncached) if subject: self.assertEqual(msg.topic_name(), subject) diff --git a/zerver/tests/test_narrow.py b/zerver/tests/test_narrow.py index 1d75a22107..ab8b7e85da 100644 --- a/zerver/tests/test_narrow.py +++ b/zerver/tests/test_narrow.py @@ -13,6 +13,9 @@ from zerver.models import ( get_display_recipient, get_recipient, get_realm, get_stream, get_user_profile_by_email, ) from zerver.lib.actions import create_stream_if_needed, do_add_subscription +from zerver.lib.message import ( + MessageDict, +) from zerver.lib.narrow import ( build_narrow_filter, ) @@ -816,7 +819,7 @@ class GetOldMessagesTest(ZulipTestCase): m.rendered_content = m.rendered_content_version = None m.content = 'test content' # Use to_dict_uncached_helper directly to avoid having to deal with remote cache - d = m.to_dict_uncached_helper(True) + d = MessageDict.to_dict_uncached_helper(m, True) self.assertEqual(d['content'], '

test content

') def common_check_get_old_messages_query(self, query_params, expected): diff --git a/zerver/views/messages.py b/zerver/views/messages.py index 7f63dbb8fa..4e9fbdb14e 100644 --- a/zerver/views/messages.py +++ b/zerver/views/messages.py @@ -25,6 +25,11 @@ from zerver.lib.cache import ( generic_bulk_cached_fetch, to_dict_cache_key_id, ) +from zerver.lib.message import ( + MessageDict, + extract_message_dict, + stringify_message_dict, +) from zerver.lib.response import json_success, json_error from zerver.lib.sqlalchemy_utils import get_sqlalchemy_connection from zerver.lib.utils import statsd @@ -33,8 +38,7 @@ from zerver.lib.validator import \ from zerver.models import Message, UserProfile, Stream, Subscription, \ Realm, Recipient, UserMessage, bulk_get_recipients, get_recipient, \ get_user_profile_by_email, get_stream, \ - parse_usermessage_flags, extract_message_dict, \ - stringify_message_dict, \ + parse_usermessage_flags, \ resolve_email_to_domain, get_realm, get_active_streams, \ bulk_get_streams, get_user_profile_by_id @@ -636,7 +640,7 @@ def get_old_messages_backend(request, user_profile, search_fields[message_id] = get_search_fields(rendered_content, subject, content_matches, subject_matches) - cache_transformer = lambda row: Message.build_dict_from_raw_db_row(row, apply_markdown) + cache_transformer = lambda row: MessageDict.build_dict_from_raw_db_row(row, apply_markdown) id_fetcher = lambda row: row['id'] message_dicts = generic_bulk_cached_fetch(lambda message_id: to_dict_cache_key_id(message_id, apply_markdown),