linkifiers: Support URL percent-encoded bytes.

Supporting URL percent-encoded bytes is possible using `%%20`, but this
is not necessarily very understandable to end-users, even those that
understand percent encoding.

Allow `%20` in linkifier URL format strings, and transform them into
`%%20` in the pattern just before they are applied in markdown
translation.  Care must be taken here, such that already-escaped `%`s
are not escaped an extra time.

We do this before rendering, and not before storage, as
a simplification; the JS-side linkifier at present only understands
`%(foo)s` and thus needs no changes, and to avoid an un-escaping pass
before showing in the admin UI.
This commit is contained in:
Alex Vandiver 2021-10-20 02:57:53 +00:00 committed by Tim Abbott
parent d6768814a1
commit 9381a3bd45
4 changed files with 45 additions and 8 deletions

View File

@ -1795,7 +1795,22 @@ class LinkifierPattern(CompiledInlineProcessor):
options.log_errors = False
compiled_re2 = re2.compile(prepare_linkifier_pattern(source_pattern), options=options)
self.format_string = format_string
# Find percent-encoded bytes and escape them from the python
# interpolation. That is:
# %(foo)s -> %(foo)s
# %% -> %%
# %ab -> %%ab
# %%ab -> %%ab
# %%%ab -> %%%%ab
#
# We do this here, rather than before storing, to make edits
# to the underlying linkifier more straightforward, and
# because JS does not have a real formatter.
self.format_string = re.sub(
r"(?<!%)(%%)*%([a-fA-F0-9][a-fA-F0-9])", r"\1%%\2", format_string
)
super().__init__(compiled_re2, md)
def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype

View File

@ -1138,6 +1138,9 @@ def filter_format_validator(value: str) -> None:
\( [a-zA-Z0-9_-]+ \) s # Interpolation group
| # OR
% # %%, which is an escaped %
| # OR
[0-9a-fA-F][0-9a-fA-F] # URL percent-encoded bytes, which we
# special-case in markdown translation
)
)+ # Those happen one or more times
$

View File

@ -1401,6 +1401,20 @@ class MarkdownTest(ZulipTestCase):
],
)
# Test URI escaping
RealmFilter(
realm=realm,
pattern=r"url-(?P<id>[0-9]+)",
url_format_string="https://example.com/%%%ba/%(id)s",
).save()
msg = Message(sender=self.example_user("hamlet"))
content = "url-123 is well-escaped"
converted = markdown_convert(content, message_realm=realm, message=msg)
self.assertEqual(
converted.rendered_content,
'<p><a href="https://example.com/%%ba/123">url-123</a> is well-escaped</p>',
)
def test_multiple_matching_realm_patterns(self) -> None:
realm = get_realm("zulip")
url_format_string = r"https://trac.example.com/ticket/%(id)s"

View File

@ -126,6 +126,11 @@ class RealmFilterTest(ZulipTestCase):
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_success(result)
data["pattern"] = r"ZUL-URI-(?P<id>\d+)"
data["url_format_string"] = "https://example.com/%ba/%(id)s"
result = self.client_post("/json/realm/filters", info=data)
self.assert_json_success(result)
data["pattern"] = r"(?P<org>[a-zA-Z0-9_-]+)/(?P<repo>[a-zA-Z0-9_-]+)#(?P<id>[0-9]+)"
data["url_format_string"] = "https://github.com/%(org)s/%(repo)s/issue/%(id)s"
result = self.client_post("/json/realm/filters", info=data)
@ -215,6 +220,13 @@ class RealmFilterTest(ZulipTestCase):
"https://example.com/%(foo)s%(bars)s",
"https://example.com/%(foo)s/and/%(bar)s",
"https://example.com/?foo=%(foo)s",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20%(back)s",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%%",
"https://example.com/%%(",
"https://example.com/%%()",
@ -228,13 +240,6 @@ class RealmFilterTest(ZulipTestCase):
invalid_urls = [
"file:///etc/passwd",
"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==",
"https://example.com/%ab",
"https://example.com/%ba",
"https://example.com/%21",
"https://example.com/words%20with%20spaces",
"https://example.com/back%20to%20%(back)s",
"https://example.com/encoded%2fwith%2fletters",
"https://example.com/encoded%2Fwith%2Fupper%2Fcase%2Fletters",
"https://example.com/%(foo)",
"https://example.com/%()s",
"https://example.com/%4!",