user exports: Chunkify messages in sorted order.

This accomplishes a few things:

    * It extracts `chunkify` rather than having us
      clumsily track chunking-related stuff in a
      big loop that is doing other stuff.

    * It makes it so that all message ids
      in message-000001.json < message-000002.json.

    * It makes it easier for us to customize
      the messages we send to a single user
      (coming soon).

BTW we probably have a slicker version of chunkify
somewhere in our codebase, but I couldn't remember
where.
This commit is contained in:
Steve Howell 2021-12-09 15:17:58 +00:00 committed by Tim Abbott
parent 2a73964e16
commit 8ea320812f

View File

@ -2038,21 +2038,42 @@ def get_single_user_config() -> Config:
return user_profile_config
def chunkify(lst: List[int], chunk_size: int) -> List[List[int]]:
# chunkify([1,2,3,4,5], 2) == [[1,2], [3,4], [5]]
result = []
i = 0
while True:
chunk = lst[i : i + chunk_size]
if len(chunk) == 0:
break
else:
result.append(chunk)
i += chunk_size
return result
def export_messages_single_user(
user_profile: UserProfile, output_dir: Path, chunk_size: int = MESSAGE_BATCH_CHUNK_SIZE
) -> None:
user_message_query = UserMessage.objects.filter(user_profile=user_profile).order_by("id")
min_id = -1
dump_file_id = 1
while True:
actual_query = user_message_query.select_related(
"message", "message__sending_client"
).filter(id__gt=min_id)[0:chunk_size]
user_message_chunk = list(actual_query)
user_message_ids = {um.id for um in user_message_chunk}
if len(user_message_chunk) == 0:
break
# Do a slim query to find all message ids that pertain to us.
# TODO: be more selective about which message ids we export.
all_message_ids = (
UserMessage.objects.filter(user_profile=user_profile)
.values_list("message_id", flat=True)
.order_by("message_id")
)
dump_file_id = 1
for message_id_chunk in chunkify(all_message_ids, chunk_size):
fat_query = (
UserMessage.objects.select_related("message", "message__sending_client")
.filter(user_profile=user_profile, message_id__in=message_id_chunk)
.order_by("message_id")
)
user_message_chunk = list(fat_query)
message_chunk = []
for user_message in user_message_chunk:
@ -2071,7 +2092,6 @@ def export_messages_single_user(
floatify_datetime_fields(output, "zerver_message")
write_table_data(message_filename, output)
min_id = max(user_message_ids)
dump_file_id += 1