- Import an existing Slack, Mattermost, HipChat, Stride, - or Gitter workspace into Zulip. + Import an existing Slack, Mattermost or Gitter workspace into Zulip.
diff --git a/templates/zerver/help/about-streams-and-topics.md b/templates/zerver/help/about-streams-and-topics.md index e48ad741f9..a74244a2a1 100644 --- a/templates/zerver/help/about-streams-and-topics.md +++ b/templates/zerver/help/about-streams-and-topics.md @@ -7,7 +7,7 @@ message is about. |---|---|--- | Zulip | Stream | Topic | Email | Mailing list | Subject line -| Slack/IRC/HipChat | Channel/Room | - +| Slack/IRC | Channel/Room | - Messages with the same stream and topic are shown together as a conversational thread. Here is what it looks like in Zulip. diff --git a/templates/zerver/why-zulip.md b/templates/zerver/why-zulip.md index e61b897414..496fdbe578 100644 --- a/templates/zerver/why-zulip.md +++ b/templates/zerver/why-zulip.md @@ -1,7 +1,7 @@ There are a lot of team chat apps. So why did we build Zulip? We talk about Slack in the discussion below, but the problems apply equally -to other apps with Slack’s conversation model, including HipChat, IRC, +to other apps with Slack’s conversation model, including IRC, Mattermost, Discord, Spark, and others. ## Reading busy Slack channels is extremely inefficient. diff --git a/tools/test-backend b/tools/test-backend index 94a8f78cda..a19463d22e 100755 --- a/tools/test-backend +++ b/tools/test-backend @@ -120,7 +120,6 @@ not_yet_fully_covered = [ 'zerver/tornado/sharding.py', 'zerver/tornado/views.py', # Data import files; relatively low priority - 'zerver/data_import/hipchat*.py', 'zerver/data_import/sequencer.py', 'zerver/data_import/slack.py', 'zerver/data_import/gitter.py', diff --git a/version.py b/version.py index 7e0ec4e286..655d81fefa 100644 --- a/version.py +++ b/version.py @@ -43,4 +43,4 @@ API_FEATURE_LEVEL = 36 # historical commits sharing the same major version, in which case a # minor version bump suffices. -PROVISION_VERSION = '119.0' +PROVISION_VERSION = '120.0' diff --git a/zerver/data_import/hipchat.py b/zerver/data_import/hipchat.py deleted file mode 100755 index 15e61f3993..0000000000 --- a/zerver/data_import/hipchat.py +++ /dev/null @@ -1,882 +0,0 @@ -import base64 -import glob -import logging -import os -import re -import shutil -import subprocess -from typing import Any, Callable, Dict, List, Optional, Set - -import dateutil -import hypchat -import orjson -from django.conf import settings -from django.utils.timezone import now as timezone_now - -from zerver.data_import.hipchat_attachment import AttachmentHandler -from zerver.data_import.hipchat_user import UserHandler -from zerver.data_import.import_util import ( - SubscriberHandler, - build_message, - build_personal_subscriptions, - build_public_stream_subscriptions, - build_realm, - build_realm_emoji, - build_recipients, - build_stream, - build_stream_subscriptions, - build_user_profile, - build_zerver_realm, - create_converted_data_files, - make_subscriber_map, - make_user_messages, - write_avatar_png, -) -from zerver.data_import.sequencer import NEXT_ID, IdMapper -from zerver.lib.utils import process_list_in_batches -from zerver.models import RealmEmoji, Recipient, UserProfile - -# stubs -ZerverFieldsT = Dict[str, Any] - -def str_date_to_float(date_str: str) -> float: - ''' - Dates look like this: - - "2018-08-08T14:23:54Z 626267" - ''' - - parts = date_str.split(' ') - time_str = parts[0].replace('T', ' ') - date_time = dateutil.parser.parse(time_str) - timestamp = date_time.timestamp() - if len(parts) == 2: - microseconds = int(parts[1]) - timestamp += microseconds / 1000000.0 - return timestamp - -def untar_input_file(tar_file: str) -> str: - data_dir = tar_file.replace('.tar', '') - data_dir = os.path.abspath(data_dir) - - if os.path.exists(data_dir): - logging.info('input data was already untarred to %s, we will use it', data_dir) - return data_dir - - os.makedirs(data_dir) - - subprocess.check_call(['tar', '-xf', tar_file, '-C', data_dir]) - - logging.info('input data was untarred to %s', data_dir) - - return data_dir - -def read_user_data(data_dir: str) -> List[ZerverFieldsT]: - fn = 'users.json' - data_file = os.path.join(data_dir, fn) - with open(data_file, "rb") as fp: - return orjson.loads(fp.read()) - -def convert_user_data(user_handler: UserHandler, - slim_mode: bool, - user_id_mapper: IdMapper, - raw_data: List[ZerverFieldsT], - realm_id: int) -> None: - flat_data = [ - d['User'] - for d in raw_data - ] - - def process(in_dict: ZerverFieldsT) -> ZerverFieldsT: - delivery_email = in_dict['email'] - email = in_dict['email'] - full_name = in_dict['name'] - id = user_id_mapper.get(in_dict['id']) - is_mirror_dummy = False - short_name = in_dict['mention_name'] - timezone = in_dict['timezone'] - - role = UserProfile.ROLE_MEMBER - if in_dict['account_type'] == 'admin': - role = UserProfile.ROLE_REALM_ADMINISTRATOR - if in_dict['account_type'] == 'guest': - role = UserProfile.ROLE_GUEST - - date_joined = int(timezone_now().timestamp()) - is_active = not in_dict['is_deleted'] - - if not email: - if role == UserProfile.ROLE_GUEST: - # HipChat guest users don't have emails, so - # we just fake them. - email = f'guest-{id}@example.com' - delivery_email = email - else: - # HipChat sometimes doesn't export an email for deactivated users. - assert not is_active - email = delivery_email = f"deactivated-{id}@example.com" - - # unmapped fields: - # title - Developer, Project Manager, etc. - # rooms - no good sample data - # created - we just use "now" - # roles - we just use account_type - - if in_dict.get('avatar'): - avatar_source = 'U' - else: - avatar_source = 'G' - - return build_user_profile( - avatar_source=avatar_source, - date_joined=date_joined, - delivery_email=delivery_email, - email=email, - full_name=full_name, - id=id, - is_active=is_active, - role=role, - is_mirror_dummy=is_mirror_dummy, - realm_id=realm_id, - short_name=short_name, - timezone=timezone, - ) - - for raw_item in flat_data: - user = process(raw_item) - user_handler.add_user(user) - -def convert_avatar_data(avatar_folder: str, - raw_data: List[ZerverFieldsT], - user_id_mapper: IdMapper, - realm_id: int) -> List[ZerverFieldsT]: - ''' - This code is pretty specific to how HipChat sends us data. - They give us the avatar payloads in base64 in users.json. - - We process avatars in our own pass of that data, rather - than doing it while we're getting other user data. I - chose to keep this separate, as otherwise you have a lot - of extraneous data getting passed around. - - This code has MAJOR SIDE EFFECTS--namely writing a bunch - of files to the avatars directory. - ''' - - avatar_records = [] - - for d in raw_data: - raw_user = d['User'] - avatar_payload = raw_user.get('avatar') - if not avatar_payload: - continue - - bits = base64.b64decode(avatar_payload) - - raw_user_id = raw_user['id'] - if not user_id_mapper.has(raw_user_id): - continue - - user_id = user_id_mapper.get(raw_user_id) - - metadata = write_avatar_png( - avatar_folder=avatar_folder, - realm_id=realm_id, - user_id=user_id, - bits=bits, - ) - avatar_records.append(metadata) - - return avatar_records - -def read_room_data(data_dir: str) -> List[ZerverFieldsT]: - fn = 'rooms.json' - data_file = os.path.join(data_dir, fn) - with open(data_file, "rb") as f: - data = orjson.loads(f.read()) - return data - -def convert_room_data(raw_data: List[ZerverFieldsT], - subscriber_handler: SubscriberHandler, - stream_id_mapper: IdMapper, - user_id_mapper: IdMapper, - realm_id: int, - api_token: Optional[str]=None) -> List[ZerverFieldsT]: - flat_data = [ - d['Room'] - for d in raw_data - ] - - def get_invite_only(v: str) -> bool: - if v == 'public': - return False - elif v == 'private': - return True - else: - raise Exception('unexpected value') - - streams = [] - - for in_dict in flat_data: - now = int(timezone_now().timestamp()) - stream_id = stream_id_mapper.get(in_dict['id']) - - invite_only = get_invite_only(in_dict['privacy']) - - stream = build_stream( - date_created=now, - realm_id=realm_id, - name=in_dict['name'], - description=in_dict['topic'], - stream_id=stream_id, - deactivated=in_dict['is_archived'], - invite_only=invite_only, - ) - - if invite_only: - users: Set[int] = { - user_id_mapper.get(key) - for key in in_dict['members'] - if user_id_mapper.has(key) - } - - if user_id_mapper.has(in_dict['owner']): - owner = user_id_mapper.get(in_dict['owner']) - users.add(owner) - else: - users = set() - if api_token is not None: - hc = hypchat.HypChat(api_token) - room_data = hc.fromurl('{}/v2/room/{}/member'.format(hc.endpoint, in_dict['id'])) - - for item in room_data['items']: - hipchat_user_id = item['id'] - zulip_user_id = user_id_mapper.get(hipchat_user_id) - users.add(zulip_user_id) - - if users: - subscriber_handler.set_info( - stream_id=stream_id, - users=users, - ) - - # unmapped fields: - # guest_access_url: no Zulip equivalent - # created: we just use "now" - # participants: no good sample data - - streams.append(stream) - - return streams - -def make_realm(realm_id: int) -> ZerverFieldsT: - NOW = float(timezone_now().timestamp()) - domain_name = settings.EXTERNAL_HOST - realm_subdomain = "" - zerver_realm = build_zerver_realm(realm_id, realm_subdomain, NOW, 'HipChat') - realm = build_realm(zerver_realm, realm_id, domain_name) - - # We may override these later. - realm['zerver_defaultstream'] = [] - - return realm - -def write_avatar_data(raw_user_data: List[ZerverFieldsT], - output_dir: str, - user_id_mapper: IdMapper, - realm_id: int) -> None: - avatar_folder = os.path.join(output_dir, 'avatars') - avatar_realm_folder = os.path.join(avatar_folder, str(realm_id)) - os.makedirs(avatar_realm_folder, exist_ok=True) - - avatar_records = convert_avatar_data( - avatar_folder=avatar_folder, - raw_data=raw_user_data, - user_id_mapper=user_id_mapper, - realm_id=realm_id, - ) - - create_converted_data_files(avatar_records, output_dir, '/avatars/records.json') - -def write_emoticon_data(realm_id: int, - data_dir: str, - output_dir: str) -> List[ZerverFieldsT]: - ''' - This function does most of the work for processing emoticons, the bulk - of which is copying files. We also write a json file with metadata. - Finally, we return a list of RealmEmoji dicts to our caller. - - In our data_dir we have a pretty simple setup: - - emoticons.json - has very simple metadata on emojis: - - { - "Emoticon": { - "id": 9875487, - "path": "emoticons/yasss.jpg", - "shortcut": "yasss" - } - }, - { - "Emoticon": { - "id": 718017, - "path": "emoticons/yayyyyy.gif", - "shortcut": "yayyyyy" - } - } - - emoticons/ - contains a bunch of image files: - - slytherinsnake.gif - spanishinquisition.jpg - sparkle.png - spiderman.gif - stableparrot.gif - stalkerparrot.gif - supergirl.png - superman.png - - We move all the relevant files to Zulip's more nested - directory structure. - ''' - - logging.info('Starting to process emoticons') - - fn = 'emoticons.json' - data_file = os.path.join(data_dir, fn) - if not os.path.exists(data_file): - logging.warning("HipChat export does not contain emoticons.json.") - logging.warning("As a result, custom emoji cannot be imported.") - return [] - - with open(data_file, "rb") as f: - data = orjson.loads(f.read()) - - if isinstance(data, dict) and 'Emoticons' in data: - # Handle the hc-migrate export format for emoticons.json. - flat_data = [ - dict( - path=d['path'], - name=d['shortcut'], - ) - for d in data['Emoticons'] - ] - else: - flat_data = [ - dict( - path=d['Emoticon']['path'], - name=d['Emoticon']['shortcut'], - ) - for d in data - ] - - emoji_folder = os.path.join(output_dir, 'emoji') - os.makedirs(emoji_folder, exist_ok=True) - - def process(data: ZerverFieldsT) -> ZerverFieldsT: - source_sub_path = data['path'] - source_fn = os.path.basename(source_sub_path) - source_path = os.path.join(data_dir, source_sub_path) - - # Use our template from RealmEmoji - # PATH_ID_TEMPLATE = "{realm_id}/emoji/images/{emoji_file_name}" - target_fn = source_fn - target_sub_path = RealmEmoji.PATH_ID_TEMPLATE.format( - realm_id=realm_id, - emoji_file_name=target_fn, - ) - target_path = os.path.join(emoji_folder, target_sub_path) - - os.makedirs(os.path.dirname(target_path), exist_ok=True) - - source_path = os.path.abspath(source_path) - target_path = os.path.abspath(target_path) - - shutil.copyfile(source_path, target_path) - - return dict( - path=target_path, - s3_path=target_path, - file_name=target_fn, - realm_id=realm_id, - name=data['name'], - ) - - emoji_records = list(map(process, flat_data)) - create_converted_data_files(emoji_records, output_dir, '/emoji/records.json') - - realmemoji = [ - build_realm_emoji( - realm_id=realm_id, - name=rec['name'], - id=NEXT_ID('realmemoji'), - file_name=rec['file_name'], - ) - for rec in emoji_records - ] - logging.info('Done processing emoticons') - - return realmemoji - -def write_message_data(realm_id: int, - slim_mode: bool, - message_key: str, - zerver_recipient: List[ZerverFieldsT], - subscriber_map: Dict[int, Set[int]], - data_dir: str, - output_dir: str, - masking_content: bool, - stream_id_mapper: IdMapper, - user_id_mapper: IdMapper, - user_handler: UserHandler, - attachment_handler: AttachmentHandler) -> None: - - stream_id_to_recipient_id = { - d['type_id']: d['id'] - for d in zerver_recipient - if d['type'] == Recipient.STREAM - } - - user_id_to_recipient_id = { - d['type_id']: d['id'] - for d in zerver_recipient - if d['type'] == Recipient.PERSONAL - } - - def get_stream_recipient_id(raw_message: ZerverFieldsT) -> int: - fn_id = raw_message['fn_id'] - stream_id = stream_id_mapper.get(fn_id) - recipient_id = stream_id_to_recipient_id[stream_id] - return recipient_id - - def get_pm_recipient_id(raw_message: ZerverFieldsT) -> int: - raw_user_id = raw_message['receiver_id'] - assert(raw_user_id) - user_id = user_id_mapper.get(raw_user_id) - recipient_id = user_id_to_recipient_id[user_id] - return recipient_id - - if message_key in ['UserMessage', 'NotificationMessage']: - is_pm_data = False - dir_glob = os.path.join(data_dir, 'rooms', '*', 'history.json') - get_recipient_id = get_stream_recipient_id - get_files_dir = lambda fn_id: os.path.join(data_dir, 'rooms', str(fn_id), 'files') - - elif message_key == 'PrivateUserMessage': - is_pm_data = True - dir_glob = os.path.join(data_dir, 'users', '*', 'history.json') - get_recipient_id = get_pm_recipient_id - get_files_dir = lambda fn_id: os.path.join(data_dir, 'users', 'files') - - else: - raise Exception('programming error: invalid message_key: ' + message_key) - - history_files = glob.glob(dir_glob) - for fn in history_files: - dir = os.path.dirname(fn) - fn_id = os.path.basename(dir) - files_dir = get_files_dir(fn_id) - - process_message_file( - realm_id=realm_id, - slim_mode=slim_mode, - fn=fn, - fn_id=fn_id, - files_dir=files_dir, - get_recipient_id=get_recipient_id, - message_key=message_key, - subscriber_map=subscriber_map, - data_dir=data_dir, - output_dir=output_dir, - is_pm_data=is_pm_data, - masking_content=masking_content, - user_id_mapper=user_id_mapper, - user_handler=user_handler, - attachment_handler=attachment_handler, - ) - -def get_hipchat_sender_id(realm_id: int, - slim_mode: bool, - message_dict: Dict[str, Any], - user_id_mapper: IdMapper, - user_handler: UserHandler) -> Optional[int]: - ''' - The HipChat export is inconsistent in how it renders - senders, and sometimes we don't even get an id. - ''' - if isinstance(message_dict['sender'], str): - if slim_mode: - return None - # Some HipChat instances just give us a person's - # name in the sender field for NotificationMessage. - # We turn them into a mirror user. - mirror_user = user_handler.get_mirror_user( - realm_id=realm_id, - name=message_dict['sender'], - ) - sender_id = mirror_user['id'] - return sender_id - - raw_sender_id = message_dict['sender']['id'] - - if raw_sender_id == 0: - if slim_mode: - return None - mirror_user = user_handler.get_mirror_user( - realm_id=realm_id, - name=message_dict['sender']['name'], - ) - sender_id = mirror_user['id'] - return sender_id - - if not user_id_mapper.has(raw_sender_id): - if slim_mode: - return None - mirror_user = user_handler.get_mirror_user( - realm_id=realm_id, - name=message_dict['sender']['id'], - ) - sender_id = mirror_user['id'] - return sender_id - - # HAPPY PATH: HipChat just gave us an ordinary - # sender_id. - sender_id = user_id_mapper.get(raw_sender_id) - return sender_id - -def process_message_file(realm_id: int, - slim_mode: bool, - fn: str, - fn_id: str, - files_dir: str, - get_recipient_id: Callable[[ZerverFieldsT], int], - message_key: str, - subscriber_map: Dict[int, Set[int]], - data_dir: str, - output_dir: str, - is_pm_data: bool, - masking_content: bool, - user_id_mapper: IdMapper, - user_handler: UserHandler, - attachment_handler: AttachmentHandler) -> None: - - def get_raw_messages(fn: str) -> List[ZerverFieldsT]: - with open(fn, "rb") as f: - data = orjson.loads(f.read()) - - flat_data = [ - d[message_key] - for d in data - if message_key in d - ] - - def get_raw_message(d: Dict[str, Any]) -> Optional[ZerverFieldsT]: - sender_id = get_hipchat_sender_id( - realm_id=realm_id, - slim_mode=slim_mode, - message_dict=d, - user_id_mapper=user_id_mapper, - user_handler=user_handler, - ) - - if sender_id is None: - return None - - if is_pm_data: - # We need to compare with str() on both sides here. - # In Stride, user IDs are strings, but in HipChat, - # they are integers, and fn_id is always a string. - if str(sender_id) != str(fn_id): - # PMs are in multiple places in the HipChat export, - # and we only use the copy from the sender - return None - - content = d['message'] - - if masking_content: - content = re.sub('[a-z]', 'x', content) - content = re.sub('[A-Z]', 'X', content) - - return dict( - fn_id=fn_id, - sender_id=sender_id, - receiver_id=d.get('receiver', {}).get('id'), - content=content, - mention_user_ids=d.get('mentions', []), - date_sent=str_date_to_float(d['timestamp']), - attachment=d.get('attachment'), - files_dir=files_dir, - ) - - raw_messages = [] - - for d in flat_data: - raw_message = get_raw_message(d) - if raw_message is not None: - raw_messages.append(raw_message) - - return raw_messages - - raw_messages = get_raw_messages(fn) - - def process_batch(lst: List[Any]) -> None: - process_raw_message_batch( - realm_id=realm_id, - raw_messages=lst, - subscriber_map=subscriber_map, - user_id_mapper=user_id_mapper, - user_handler=user_handler, - attachment_handler=attachment_handler, - get_recipient_id=get_recipient_id, - is_pm_data=is_pm_data, - output_dir=output_dir, - ) - - chunk_size = 1000 - - process_list_in_batches( - lst=raw_messages, - chunk_size=chunk_size, - process_batch=process_batch, - ) - -def process_raw_message_batch(realm_id: int, - raw_messages: List[Dict[str, Any]], - subscriber_map: Dict[int, Set[int]], - user_id_mapper: IdMapper, - user_handler: UserHandler, - attachment_handler: AttachmentHandler, - get_recipient_id: Callable[[ZerverFieldsT], int], - is_pm_data: bool, - output_dir: str) -> None: - - def fix_mentions(content: str, - mention_user_ids: Set[int]) -> str: - for user_id in mention_user_ids: - user = user_handler.get_user(user_id=user_id) - hipchat_mention = '@{short_name}'.format(**user) - zulip_mention = '@**{full_name}**'.format(**user) - content = content.replace(hipchat_mention, zulip_mention) - - content = content.replace('@here', '@**all**') - return content - - mention_map: Dict[int, Set[int]] = {} - - zerver_message = [] - - import html2text - h = html2text.HTML2Text() - - for raw_message in raw_messages: - # One side effect here: - - message_id = NEXT_ID('message') - mention_user_ids = { - user_id_mapper.get(id) - for id in set(raw_message['mention_user_ids']) - if user_id_mapper.has(id) - } - mention_map[message_id] = mention_user_ids - - content = fix_mentions( - content=raw_message['content'], - mention_user_ids=mention_user_ids, - ) - content = h.handle(content) - - if len(content) > 10000: - logging.info('skipping too-long message of length %s', len(content)) - continue - - date_sent = raw_message['date_sent'] - - try: - recipient_id = get_recipient_id(raw_message) - except KeyError: - logging.debug("Could not find recipient_id for a message, skipping.") - continue - - rendered_content = None - - if is_pm_data: - topic_name = '' - else: - topic_name = 'imported from HipChat' - user_id = raw_message['sender_id'] - - # Another side effect: - extra_content = attachment_handler.handle_message_data( - realm_id=realm_id, - message_id=message_id, - sender_id=user_id, - attachment=raw_message['attachment'], - files_dir=raw_message['files_dir'], - ) - - if extra_content: - has_attachment = True - content += '\n' + extra_content - else: - has_attachment = False - - message = build_message( - content=content, - message_id=message_id, - date_sent=date_sent, - recipient_id=recipient_id, - rendered_content=rendered_content, - topic_name=topic_name, - user_id=user_id, - has_attachment=has_attachment, - ) - zerver_message.append(message) - - zerver_usermessage = make_user_messages( - zerver_message=zerver_message, - subscriber_map=subscriber_map, - is_pm_data=is_pm_data, - mention_map=mention_map, - ) - - message_json = dict( - zerver_message=zerver_message, - zerver_usermessage=zerver_usermessage, - ) - - dump_file_id = NEXT_ID('dump_file_id') - message_file = f"/messages-{dump_file_id:06}.json" - create_converted_data_files(message_json, output_dir, message_file) - -def do_convert_data(input_tar_file: str, - output_dir: str, - masking_content: bool, - api_token: Optional[str]=None, - slim_mode: bool=False) -> None: - input_data_dir = untar_input_file(input_tar_file) - - attachment_handler = AttachmentHandler() - user_handler = UserHandler() - subscriber_handler = SubscriberHandler() - user_id_mapper = IdMapper() - stream_id_mapper = IdMapper() - - realm_id = 0 - realm = make_realm(realm_id=realm_id) - - # users.json -> UserProfile - raw_user_data = read_user_data(data_dir=input_data_dir) - convert_user_data( - user_handler=user_handler, - slim_mode=slim_mode, - user_id_mapper=user_id_mapper, - raw_data=raw_user_data, - realm_id=realm_id, - ) - normal_users = user_handler.get_normal_users() - # Don't write zerver_userprofile here, because we - # may add more users later. - - # streams.json -> Stream - raw_stream_data = read_room_data(data_dir=input_data_dir) - zerver_stream = convert_room_data( - raw_data=raw_stream_data, - subscriber_handler=subscriber_handler, - stream_id_mapper=stream_id_mapper, - user_id_mapper=user_id_mapper, - realm_id=realm_id, - api_token=api_token, - ) - realm['zerver_stream'] = zerver_stream - - zerver_recipient = build_recipients( - zerver_userprofile=normal_users, - zerver_stream=zerver_stream, - ) - realm['zerver_recipient'] = zerver_recipient - - if api_token is None: - if slim_mode: - public_stream_subscriptions: List[ZerverFieldsT] = [] - else: - public_stream_subscriptions = build_public_stream_subscriptions( - zerver_userprofile=normal_users, - zerver_recipient=zerver_recipient, - zerver_stream=zerver_stream, - ) - - private_stream_subscriptions = build_stream_subscriptions( - get_users=subscriber_handler.get_users, - zerver_recipient=zerver_recipient, - zerver_stream=[stream_dict for stream_dict in zerver_stream - if stream_dict['invite_only']], - ) - stream_subscriptions = public_stream_subscriptions + private_stream_subscriptions - else: - stream_subscriptions = build_stream_subscriptions( - get_users=subscriber_handler.get_users, - zerver_recipient=zerver_recipient, - zerver_stream=zerver_stream, - ) - - personal_subscriptions = build_personal_subscriptions( - zerver_recipient=zerver_recipient, - ) - zerver_subscription = personal_subscriptions + stream_subscriptions - - realm['zerver_subscription'] = zerver_subscription - - zerver_realmemoji = write_emoticon_data( - realm_id=realm_id, - data_dir=input_data_dir, - output_dir=output_dir, - ) - realm['zerver_realmemoji'] = zerver_realmemoji - - subscriber_map = make_subscriber_map( - zerver_subscription=zerver_subscription, - ) - - logging.info('Start importing message data') - for message_key in ['UserMessage', - 'NotificationMessage', - 'PrivateUserMessage']: - write_message_data( - realm_id=realm_id, - slim_mode=slim_mode, - message_key=message_key, - zerver_recipient=zerver_recipient, - subscriber_map=subscriber_map, - data_dir=input_data_dir, - output_dir=output_dir, - masking_content=masking_content, - stream_id_mapper=stream_id_mapper, - user_id_mapper=user_id_mapper, - user_handler=user_handler, - attachment_handler=attachment_handler, - ) - - # Order is important here...don't write users until - # we process everything else, since we may introduce - # mirror users when processing messages. - realm['zerver_userprofile'] = user_handler.get_all_users() - realm['sort_by_date'] = True - - create_converted_data_files(realm, output_dir, '/realm.json') - - logging.info('Start importing avatar data') - write_avatar_data( - raw_user_data=raw_user_data, - output_dir=output_dir, - user_id_mapper=user_id_mapper, - realm_id=realm_id, - ) - - attachment_handler.write_info( - output_dir=output_dir, - realm_id=realm_id, - ) - - logging.info('Start making tarball') - subprocess.check_call(["tar", "-czf", output_dir + '.tar.gz', output_dir, '-P']) - logging.info('Done making tarball') diff --git a/zerver/data_import/hipchat_attachment.py b/zerver/data_import/hipchat_attachment.py deleted file mode 100644 index 5193f4afe0..0000000000 --- a/zerver/data_import/hipchat_attachment.py +++ /dev/null @@ -1,136 +0,0 @@ -import logging -import os -import shutil -from typing import Any, Dict, List, Optional - -from zerver.data_import.import_util import build_attachment, create_converted_data_files - - -class AttachmentHandler: - def __init__(self) -> None: - self.info_dict: Dict[str, Dict[str, Any]] = {} - - def handle_message_data(self, - realm_id: int, - message_id: int, - sender_id: int, - attachment: Dict[str, Any], - files_dir: str) -> Optional[str]: - if not attachment: - return None - - name = attachment['name'] - - if 'path' not in attachment: - logging.info('Skipping HipChat attachment with missing path data: ' + name) - return None - - size = attachment['size'] - path = attachment['path'] - - local_fn = os.path.join(files_dir, path) - - if not os.path.exists(local_fn): - # HipChat has an option to not include these in its - # exports, since file uploads can be very large. - logging.info('Skipping attachment with no file data: ' + local_fn) - return None - - target_path = os.path.join( - str(realm_id), - 'HipChatImportAttachment', - path, - ) - - if target_path in self.info_dict: - logging.info("file used multiple times: " + path) - info = self.info_dict[target_path] - info['message_ids'].add(message_id) - return info['content'] - - # HipChat provides size info, but it's not - # completely trustworthy, so we we just - # ask the OS for file details. - size = os.path.getsize(local_fn) - mtime = os.path.getmtime(local_fn) - - content = f'[{name}](/user_uploads/{target_path})' - - info = dict( - message_ids={message_id}, - sender_id=sender_id, - local_fn=local_fn, - target_path=target_path, - name=name, - size=size, - mtime=mtime, - content=content, - ) - self.info_dict[target_path] = info - - return content - - def write_info(self, output_dir: str, realm_id: int) -> None: - attachments: List[Dict[str, Any]] = [] - uploads_records: List[Dict[str, Any]] = [] - - def add_attachment(info: Dict[str, Any]) -> None: - build_attachment( - realm_id=realm_id, - message_ids=info['message_ids'], - user_id=info['sender_id'], - fileinfo=dict( - created=info['mtime'], # minor lie - size=info['size'], - name=info['name'], - ), - s3_path=info['target_path'], - zerver_attachment=attachments, - ) - - def add_upload(info: Dict[str, Any]) -> None: - target_path = info['target_path'] - upload_rec = dict( - size=info['size'], - user_profile_id=info['sender_id'], - realm_id=realm_id, - s3_path=target_path, - path=target_path, - content_type=None, - ) - uploads_records.append(upload_rec) - - def make_full_target_path(info: Dict[str, Any]) -> str: - target_path = info['target_path'] - full_target_path = os.path.join( - output_dir, - 'uploads', - target_path, - ) - full_target_path = os.path.abspath(full_target_path) - os.makedirs(os.path.dirname(full_target_path), exist_ok=True) - return full_target_path - - def copy_file(info: Dict[str, Any]) -> None: - source_path = info['local_fn'] - target_path = make_full_target_path(info) - shutil.copyfile(source_path, target_path) - - logging.info('Start processing attachment files') - - for info in self.info_dict.values(): - add_attachment(info) - add_upload(info) - copy_file(info) - - uploads_folder = os.path.join(output_dir, 'uploads') - os.makedirs(os.path.join(uploads_folder, str(realm_id)), exist_ok=True) - - attachment = dict( - zerver_attachment=attachments, - ) - - create_converted_data_files(uploads_records, output_dir, '/uploads/records.json') - create_converted_data_files(attachment, output_dir, '/attachment.json') - - logging.info('Done processing attachment files') diff --git a/zerver/data_import/hipchat_user.py b/zerver/data_import/hipchat_user.py deleted file mode 100644 index b751239f40..0000000000 --- a/zerver/data_import/hipchat_user.py +++ /dev/null @@ -1,84 +0,0 @@ -from typing import Any, Dict, List - -from django.utils.timezone import now as timezone_now - -from zerver.data_import.import_util import build_user_profile -from zerver.models import UserProfile - - -class UserHandler: - ''' - Our UserHandler class is a glorified wrapper - around the data that eventually goes into - zerver_userprofile. - - The class helps us do things like map ids - to names for mentions. - - We also sometimes need to build mirror - users on the fly. - ''' - - def __init__(self) -> None: - self.id_to_user_map: Dict[int, Dict[str, Any]] = {} - self.name_to_mirror_user_map: Dict[str, Dict[str, Any]] = {} - self.mirror_user_id = 1 - - def add_user(self, user: Dict[str, Any]) -> None: - user_id = user['id'] - self.id_to_user_map[user_id] = user - - def get_user(self, user_id: int) -> Dict[str, Any]: - user = self.id_to_user_map[user_id] - return user - - def get_mirror_user(self, - realm_id: int, - name: str) -> Dict[str, Any]: - if name in self.name_to_mirror_user_map: - user = self.name_to_mirror_user_map[name] - return user - - user_id = self._new_mirror_user_id() - short_name = name - full_name = name - email = f'mirror-{user_id}@example.com' - delivery_email = email - avatar_source = 'G' - date_joined = int(timezone_now().timestamp()) - timezone = 'UTC' - - user = build_user_profile( - avatar_source=avatar_source, - date_joined=date_joined, - delivery_email=delivery_email, - email=email, - full_name=full_name, - id=user_id, - is_active=False, - role=UserProfile.ROLE_MEMBER, - is_mirror_dummy=True, - realm_id=realm_id, - short_name=short_name, - timezone=timezone, - ) - - self.name_to_mirror_user_map[name] = user - return user - - def _new_mirror_user_id(self) -> int: - next_id = self.mirror_user_id - while next_id in self.id_to_user_map: - next_id += 1 - self.mirror_user_id = next_id + 1 - return next_id - - def get_normal_users(self) -> List[Dict[str, Any]]: - users = list(self.id_to_user_map.values()) - return users - - def get_all_users(self) -> List[Dict[str, Any]]: - normal_users = self.get_normal_users() - mirror_users = list(self.name_to_mirror_user_map.values()) - all_users = normal_users + mirror_users - return all_users diff --git a/zerver/data_import/import_util.py b/zerver/data_import/import_util.py index 8b0a2ef8a0..e844ebe269 100644 --- a/zerver/data_import/import_util.py +++ b/zerver/data_import/import_util.py @@ -170,8 +170,8 @@ def build_public_stream_subscriptions( zerver_recipient: List[ZerverFieldsT], zerver_stream: List[ZerverFieldsT]) -> List[ZerverFieldsT]: ''' - This function is only used for HipChat now, but it may apply to - future conversions. We often don't get full subscriber data in + This function was only used for HipChat, but it may apply to + future conversions. We often did't get full subscriber data in the HipChat export, so this function just autosubscribes all users to every public stream. This returns a list of Subscription dicts. @@ -298,8 +298,8 @@ def build_recipients(zerver_userprofile: Iterable[ZerverFieldsT], zerver_stream: Iterable[ZerverFieldsT], zerver_huddle: Iterable[ZerverFieldsT] = []) -> List[ZerverFieldsT]: ''' - As of this writing, we only use this in the HipChat - conversion. The Slack and Gitter conversions do it more + This function was only used HipChat import, this function may be + required for future conversions. The Slack and Gitter conversions do it more tightly integrated with creating other objects. ''' diff --git a/zerver/data_import/sequencer.py b/zerver/data_import/sequencer.py index 46d8192aa3..c82b58ddff 100644 --- a/zerver/data_import/sequencer.py +++ b/zerver/data_import/sequencer.py @@ -8,7 +8,7 @@ sequences work. You need to be a bit careful here, since you're dealing with a big singleton, but for data imports that's usually easy to -manage. See hipchat.py for example usage. +manage. ''' def _seq() -> Callable[[], int]: diff --git a/zerver/lib/markdown/tabbed_sections.py b/zerver/lib/markdown/tabbed_sections.py index a5952af030..64b1b7c57d 100644 --- a/zerver/lib/markdown/tabbed_sections.py +++ b/zerver/lib/markdown/tabbed_sections.py @@ -52,10 +52,6 @@ TAB_DISPLAY_NAMES = { 'desktop': 'Desktop', 'mobile': 'Mobile', - 'cloud': 'HipChat Cloud', - 'server': 'HipChat Server or Data Center', - 'stride': 'Stride', - 'mm-default': 'Default installation', 'mm-docker': 'Docker', 'mm-gitlab-omnibus': 'GitLab Omnibus', diff --git a/zerver/management/commands/convert_hipchat_data.py b/zerver/management/commands/convert_hipchat_data.py deleted file mode 100644 index ce2154ae9e..0000000000 --- a/zerver/management/commands/convert_hipchat_data.py +++ /dev/null @@ -1,81 +0,0 @@ -import argparse -import os -from typing import Any - -''' -Example usage for testing purposes: - -Move the data: - rm -Rf ~/hipchat-data - mkdir ~/hipchat-data - ./manage.py convert_hipchat_data ~/hipchat-31028-2018-08-08_23-23-22.tar --output ~/hipchat-data - ./manage.py import --destroy-rebuild-database hipchat ~/hipchat-data - - -Test out the realm: - ./tools/run-dev.py - go to browser and use your dev url - -spec: - https://confluence.atlassian.com/hipchatkb/ - exporting-from-hipchat-server-or-data-center-for-data-portability-950821555.html -''' - -from django.core.management.base import BaseCommand, CommandError, CommandParser - -from zerver.data_import.hipchat import do_convert_data - - -class Command(BaseCommand): - help = """Convert the HipChat data into Zulip data format.""" - - def add_arguments(self, parser: CommandParser) -> None: - parser.add_argument('hipchat_tar', nargs='+', - metavar='