From 11a1cb9630b2a723fdd36cbd07992665a51b1bd2 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Thu, 21 Sep 2023 19:52:56 +0000 Subject: [PATCH] narrow: Skip the realm_id limit on joins across to usermessage. The unique index on `(user_id, message_id)` that is the `zerver_usermessage` table is rather specific, and even the PostgreSQL extended statistics are not enough for it to realize there is a correlation between the `realm_id` in the message table and the `user_id` in the usermessage table. This means that adding the `realm_id` limit when there is a join to `zerver_usermessage` flips the query plan from a nested loop of unique usermessage index-only scan, with an index scan of the messages pkey -- to a parallel hash join of the messages limit with a index scan of just the user_id limit on usermessages. It thinks this is necessary because it thinks that the `realm_id` limit may remove a large number of messages from the usermessage set -- which is totally untrue. Remove the `realm_id` limit if we have a usermessage join. --- zerver/lib/narrow.py | 11 ++++++----- zerver/tests/test_message_fetch.py | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/zerver/lib/narrow.py b/zerver/lib/narrow.py index 66d35cc068..b0eeb35d2b 100644 --- a/zerver/lib/narrow.py +++ b/zerver/lib/narrow.py @@ -912,13 +912,12 @@ def get_base_query_for_search( realm_id: int, user_profile: Optional[UserProfile], need_message: bool, need_user_message: bool ) -> Tuple[Select, ColumnElement[Integer]]: # Handle the simple case where user_message isn't involved first. - realm_cond = column("realm_id", Integer) == literal(realm_id) if not need_user_message: assert need_message query = ( select(column("id", Integer).label("message_id")) .select_from(table("zerver_message")) - .where(realm_cond) + .where(column("realm_id", Integer) == literal(realm_id)) ) inner_msg_id_col = literal_column("zerver_message.id", Integer) @@ -928,9 +927,11 @@ def get_base_query_for_search( if need_message: query = ( select(column("message_id", Integer), column("flags", Integer)) - .where(realm_cond) - .where(column("user_profile_id", Integer) == literal(user_profile.id)) - .select_from( + # We don't limit by realm_id despite the join to + # zerver_messages, since the user_profile_id limit in + # usermessage is more selective, and the query planner + # can't know about that cross-table correlation. + .where(column("user_profile_id", Integer) == literal(user_profile.id)).select_from( join( table("zerver_usermessage"), table("zerver_message"), diff --git a/zerver/tests/test_message_fetch.py b/zerver/tests/test_message_fetch.py index 72048e12ea..bf4b16b53d 100644 --- a/zerver/tests/test_message_fetch.py +++ b/zerver/tests/test_message_fetch.py @@ -3945,7 +3945,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ hamlet_email = self.example_user("hamlet").email othello_email = self.example_user("othello").email - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -3957,7 +3957,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ sql, ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) AND message_id = 0) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -3969,7 +3969,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ sql, ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (sender_id = {othello_id} AND recipient_id = {hamlet_recipient} OR sender_id = {hamlet_id} AND recipient_id = {othello_recipient}) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -3981,13 +3981,13 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ sql, ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["is", "starred"]]'}, sql ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND sender_id = {othello_id} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {othello_id} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -4012,7 +4012,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["streams", "public"]]'}, sql ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (recipient_id NOT IN ({public_streams_recipients})) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND (recipient_id NOT IN ({public_streams_recipients})) ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -4024,7 +4024,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ sql, ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND upper(subject) = upper('blah') ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND upper(subject) = upper('blah') ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( {"anchor": 0, "num_before": 0, "num_after": 9, "narrow": '[["topic", "blah"]]'}, sql @@ -4043,7 +4043,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ ) # Narrow to direct messages with yourself - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND sender_id = {hamlet_id} AND recipient_id = {hamlet_recipient} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND sender_id = {hamlet_id} AND recipient_id = {hamlet_recipient} ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -4055,7 +4055,7 @@ recipient_id = %(recipient_id_3)s AND upper(subject) = upper(%(param_2)s))\ sql, ) - sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND recipient_id = {scotland_recipient} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" + sql_template = "SELECT anon_1.message_id, anon_1.flags \nFROM (SELECT message_id, flags \nFROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \nWHERE user_profile_id = {hamlet_id} AND recipient_id = {scotland_recipient} AND (flags & 2) != 0 ORDER BY message_id ASC \n LIMIT 10) AS anon_1 ORDER BY message_id ASC" sql = sql_template.format(**query_ids) self.common_check_get_messages_query( { @@ -4079,7 +4079,7 @@ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_cont FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', 'jumping'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_5\n\ LIMIT ALL OFFSET 1)) AS topic_matches \n\ FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\ -WHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n\ +WHERE user_profile_id = {hamlet_id} AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', 'jumping')) ORDER BY message_id ASC \n\ LIMIT 10) AS anon_1 ORDER BY message_id ASC\ """ sql = sql_template.format(**query_ids) @@ -4117,7 +4117,7 @@ FROM unnest(string_to_array(ts_headline('zulip.english_us_search', rendered_cont FROM unnest(string_to_array(ts_headline('zulip.english_us_search', escape_html(subject), plainto_tsquery('zulip.english_us_search', '"jumping" quickly'), 'HighlightAll = TRUE, StartSel = , StopSel = '), '')) AS anon_5\n\ LIMIT ALL OFFSET 1)) AS topic_matches \n\ FROM zerver_usermessage JOIN zerver_message ON zerver_usermessage.message_id = zerver_message.id \n\ -WHERE realm_id = 2 AND user_profile_id = {hamlet_id} AND (content ILIKE '%jumping%' OR subject ILIKE '%jumping%') AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', '"jumping" quickly')) ORDER BY message_id ASC \n\ +WHERE user_profile_id = {hamlet_id} AND (content ILIKE '%jumping%' OR subject ILIKE '%jumping%') AND (search_tsvector @@ plainto_tsquery('zulip.english_us_search', '"jumping" quickly')) ORDER BY message_id ASC \n\ LIMIT 10) AS anon_1 ORDER BY message_id ASC\ """ sql = sql_template.format(**query_ids)