hipchat import: Add option to mask content.

Masking content can be useful for testing
out conversions where you're dealing
with data from customers and want to avoid
inadvertently reading their content (while
still having semi-realistic messages).
This commit is contained in:
Steve Howell 2018-10-24 22:57:11 +00:00 committed by showell
parent 6e8ae2e3fd
commit 272b954790
2 changed files with 24 additions and 3 deletions

View File

@ -3,6 +3,7 @@ import dateutil
import glob
import logging
import os
import re
import shutil
import subprocess
import ujson
@ -372,6 +373,7 @@ def write_message_data(realm_id: int,
subscriber_map: Dict[int, Set[int]],
data_dir: str,
output_dir: str,
masking_content: bool,
user_handler: UserHandler,
attachment_handler: AttachmentHandler) -> None:
@ -427,6 +429,7 @@ def write_message_data(realm_id: int,
subscriber_map=subscriber_map,
data_dir=data_dir,
output_dir=output_dir,
masking_content=masking_content,
user_handler=user_handler,
attachment_handler=attachment_handler,
)
@ -472,6 +475,7 @@ def process_message_file(realm_id: int,
subscriber_map: Dict[int, Set[int]],
data_dir: str,
output_dir: str,
masking_content: bool,
user_handler: UserHandler,
attachment_handler: AttachmentHandler) -> None:
@ -498,11 +502,17 @@ def process_message_file(realm_id: int,
# and we only use the copy from the sender
return None
content = d['message']
if masking_content:
content = re.sub('[a-z]', 'x', content)
content = re.sub('[A-Z]', 'X', content)
return dict(
fn_id=fn_id,
sender_id=sender_id,
receiver_id=d.get('receiver', {}).get('id'),
content=d['message'],
content=content,
mention_user_ids=d.get('mentions', []),
pub_date=str_date_to_float(d['timestamp']),
attachment=d.get('attachment'),
@ -649,7 +659,9 @@ def make_user_messages(zerver_message: List[ZerverFieldsT],
return zerver_usermessage
def do_convert_data(input_tar_file: str, output_dir: str) -> None:
def do_convert_data(input_tar_file: str,
output_dir: str,
masking_content: bool) -> None:
input_data_dir = untar_input_file(input_tar_file)
attachment_handler = AttachmentHandler()
@ -730,6 +742,7 @@ def do_convert_data(input_tar_file: str, output_dir: str) -> None:
subscriber_map=subscriber_map,
data_dir=input_data_dir,
output_dir=output_dir,
masking_content=masking_content,
user_handler=user_handler,
attachment_handler=attachment_handler,
)

View File

@ -40,6 +40,10 @@ class Command(BaseCommand):
action="store",
help='Directory to write exported data to.')
parser.add_argument('--mask', dest='masking_content',
action="store_true",
help='Mask the content for privacy during QA.')
parser.formatter_class = argparse.RawTextHelpFormatter
def handle(self, *args: Any, **options: Any) -> None:
@ -65,4 +69,8 @@ class Command(BaseCommand):
exit(1)
print("Converting Data ...")
do_convert_data(path, output_dir)
do_convert_data(
input_tar_file=path,
output_dir=output_dir,
masking_content=options.get('masking_content', False),
)