From db934be0646e3002287c72ba2154a904a4dc42b5 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 28 Sep 2021 23:27:54 +0000 Subject: [PATCH] CVE-2021-41115: Use re2 for user-supplied linkifier patterns. Zulip attempts to validate that the regular expressions that admins enter for linkifiers are well-formatted, and only contain a specific subset of regex grammar. The process of checking these properties (via a regex!) can cause denial-of-service via backtracking. Furthermore, this validation itself does not prevent the creation of linkifiers which themselves cause denial-of-service when they are executed. As the validator accepts literally anything inside of a `(?P...)` block, any quadratic backtracking expression can be hidden therein. Switch user-provided linkifier patterns to be matched in the Markdown processor by the `re2` library, which is guaranteed constant-time. This somewhat limits the possible features of the regular expression (notably, look-head and -behind, and back-references); however, these features had never been advertised as working in the context of linkifiers. A migration removes any existing linkifiers which would not function under re2, after printing them for posterity during the upgrade; they are unlikely to be common, and are impossible to fix automatically. The denial-of-service in the linkifier validator was discovered by @erik-krogh and @yoff, as GHSL-2021-118. --- .../puppeteer_tests/realm-linkifier.ts | 10 ++--- zerver/lib/markdown/__init__.py | 45 ++++++++++++++----- zerver/migrations/0359_re2_linkifiers.py | 39 ++++++++++++++++ zerver/migrations/0360_merge_0358_0359.py | 15 +++++++ zerver/models.py | 27 +++++------ zerver/tests/test_markdown.py | 9 ++-- zerver/tests/test_realm_linkifiers.py | 18 +++----- 7 files changed, 118 insertions(+), 45 deletions(-) create mode 100644 zerver/migrations/0359_re2_linkifiers.py create mode 100644 zerver/migrations/0360_merge_0358_0359.py diff --git a/frontend_tests/puppeteer_tests/realm-linkifier.ts b/frontend_tests/puppeteer_tests/realm-linkifier.ts index b58c226150..743d6537b4 100644 --- a/frontend_tests/puppeteer_tests/realm-linkifier.ts +++ b/frontend_tests/puppeteer_tests/realm-linkifier.ts @@ -42,7 +42,7 @@ async function test_delete_linkifier(page: Page): Promise { async function test_add_invalid_linkifier_pattern(page: Page): Promise { await page.waitForSelector(".admin-linkifier-form", {visible: true}); await common.fill_form(page, "form.admin-linkifier-form", { - pattern: "a$", + pattern: "(foo", url_format_string: "https://trac.example.com/ticket/%(id)s", }); await page.click("form.admin-linkifier-form button.button"); @@ -50,7 +50,7 @@ async function test_add_invalid_linkifier_pattern(page: Page): Promise { await page.waitForSelector("div#admin-linkifier-status", {visible: true}); assert.strictEqual( await common.get_text_from_selector(page, "div#admin-linkifier-status"), - "Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", + "Failed: Bad regular expression: missing ): (foo", ); } @@ -83,8 +83,8 @@ async function test_edit_invalid_linkifier(page: Page): Promise { await page.click(".linkifier_row .edit"); await page.waitForFunction(() => document.activeElement?.id === "dialog_widget_modal"); await common.fill_form(page, "form.linkifier-edit-form", { - pattern: "####", - url_format_string: "####", + pattern: "#(?Pd????)", + url_format_string: "????", }); await page.click(".dialog_submit_button"); @@ -96,7 +96,7 @@ async function test_edit_invalid_linkifier(page: Page): Promise { ); assert.strictEqual( edit_linkifier_pattern_status, - "Failed: Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-].", + "Failed: Bad regular expression: bad repetition operator: ????", ); const edit_linkifier_format_status_selector = "div#edit-linkifier-format-status"; diff --git a/zerver/lib/markdown/__init__.py b/zerver/lib/markdown/__init__.py index 5bbd11e238..1a9e977f33 100644 --- a/zerver/lib/markdown/__init__.py +++ b/zerver/lib/markdown/__init__.py @@ -38,6 +38,7 @@ import markdown.inlinepatterns import markdown.postprocessors import markdown.treeprocessors import markdown.util +import re2 import requests from django.conf import settings from markdown.blockparser import BlockParser @@ -1769,7 +1770,9 @@ class MarkdownListPreprocessor(markdown.preprocessors.Preprocessor): # Name for the outer capture group we use to separate whitespace and # other delimiters from the actual content. This value won't be an # option in user-entered capture groups. +BEFORE_CAPTURE_GROUP = "linkifier_before_match" OUTER_CAPTURE_GROUP = "linkifier_actual_match" +AFTER_CAPTURE_GROUP = "linkifier_after_match" def prepare_linkifier_pattern(source: str) -> str: @@ -1777,31 +1780,45 @@ def prepare_linkifier_pattern(source: str) -> str: whitespace, or opening delimiters, won't match if there are word characters directly after, and saves what was matched as OUTER_CAPTURE_GROUP.""" - return fr"""(?{source})(?!\w)""" + return fr"""(?P<{BEFORE_CAPTURE_GROUP}>^|\s|['"\(,:<])(?P<{OUTER_CAPTURE_GROUP}>{source})(?P<{AFTER_CAPTURE_GROUP}>$|[^\pL\pN])""" # Given a regular expression pattern, linkifies groups that match it # using the provided format string to construct the URL. -class LinkifierPattern(markdown.inlinepatterns.Pattern): +class LinkifierPattern(CompiledInlineProcessor): """Applied a given linkifier to the input""" def __init__( self, source_pattern: str, format_string: str, - md: Optional[markdown.Markdown] = None, + md: markdown.Markdown, ) -> None: - self.pattern = prepare_linkifier_pattern(source_pattern) - self.format_string = format_string - super().__init__(self.pattern, md) + # Do not write errors to stderr (this still raises exceptions) + options = re2.Options() + options.log_errors = False - def handleMatch(self, m: Match[str]) -> Union[Element, str]: + compiled_re2 = re2.compile(prepare_linkifier_pattern(source_pattern), options=options) + self.format_string = format_string + super().__init__(compiled_re2, md) + + def handleMatch( # type: ignore[override] # supertype incompatible with supersupertype + self, m: Match[str], data: str + ) -> Union[Tuple[Element, int, int], Tuple[None, None, None]]: db_data = self.md.zulip_db_data - return url_to_a( + url = url_to_a( db_data, self.format_string % m.groupdict(), markdown.util.AtomicString(m.group(OUTER_CAPTURE_GROUP)), ) + if isinstance(url, str): + return None, None, None + + return ( + url, + m.start(2), + m.end(2), + ) class UserMentionPattern(CompiledInlineProcessor): @@ -2314,11 +2331,19 @@ def topic_links(linkifiers_key: int, topic_name: str) -> List[Dict[str, str]]: matches: List[Dict[str, Union[str, int]]] = [] linkifiers = linkifiers_for_realm(linkifiers_key) + options = re2.Options() + options.log_errors = False for linkifier in linkifiers: raw_pattern = linkifier["pattern"] url_format_string = linkifier["url_format"] - pattern = prepare_linkifier_pattern(raw_pattern) - for m in re.finditer(pattern, topic_name): + try: + pattern = re2.compile(prepare_linkifier_pattern(raw_pattern), options=options) + except re2.error: + # An invalid regex shouldn't be possible here, and logging + # here on an invalid regex would spam the logs with every + # message sent; simply move on. + continue + for m in pattern.finditer(topic_name): match_details = m.groupdict() match_text = match_details[OUTER_CAPTURE_GROUP] # We format the linkifier's url string using the matched text. diff --git a/zerver/migrations/0359_re2_linkifiers.py b/zerver/migrations/0359_re2_linkifiers.py new file mode 100644 index 0000000000..7c7449dae5 --- /dev/null +++ b/zerver/migrations/0359_re2_linkifiers.py @@ -0,0 +1,39 @@ +import re2 +from django.db import migrations +from django.db.backends.postgresql.schema import DatabaseSchemaEditor +from django.db.migrations.state import StateApps + + +def delete_re2_invalid(apps: StateApps, schema_editor: DatabaseSchemaEditor) -> None: + options = re2.Options() + options.log_errors = False + + RealmFilter = apps.get_model("zerver", "RealmFilter") + found_errors = False + for linkifier in RealmFilter.objects.all(): + try: + re2.compile(linkifier.pattern, options=options) + except re2.error: + if not found_errors: + print() + found_errors = True + print( + f"Deleting linkifier {linkifier.id} in realm {linkifier.realm.string_id} which is not compatible with new re2 engine:" + ) + print(f" {linkifier.pattern} -> {linkifier.url_format_string}") + linkifier.delete() + + +class Migration(migrations.Migration): + + dependencies = [ + ("zerver", "0325_alter_realmplayground_unique_together"), + ] + + operations = [ + migrations.RunPython( + delete_re2_invalid, + reverse_code=migrations.RunPython.noop, + elidable=True, + ) + ] diff --git a/zerver/migrations/0360_merge_0358_0359.py b/zerver/migrations/0360_merge_0358_0359.py new file mode 100644 index 0000000000..8a7eaf63f0 --- /dev/null +++ b/zerver/migrations/0360_merge_0358_0359.py @@ -0,0 +1,15 @@ +# Generated by Django 3.2.7 on 2021-10-04 17:49 + +from typing import Any, List + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("zerver", "0358_split_create_stream_policy"), + ("zerver", "0359_re2_linkifiers"), + ] + + operations: List[Any] = [] diff --git a/zerver/models.py b/zerver/models.py index f45dceb5b4..1158f28459 100644 --- a/zerver/models.py +++ b/zerver/models.py @@ -20,6 +20,7 @@ from typing import ( import django.contrib.auth import orjson +import re2 from bitfield import BitField from bitfield.types import BitHandler from django.conf import settings @@ -1084,21 +1085,21 @@ post_delete.connect(flush_realm_emoji, sender=RealmEmoji) def filter_pattern_validator(value: str) -> Pattern[str]: - regex = re.compile(r"^(?:(?:[\w\-#_= /:]*|[+]|[!])(\(\?P<\w+>.+\)))+$") - error_msg = _("Invalid linkifier pattern. Valid characters are {}.").format( - "[ a-zA-Z_#=/:+!-]", - ) - - if not regex.match(str(value)): - raise ValidationError(error_msg) - try: - pattern = re.compile(value) - except re.error: - # Regex is invalid - raise ValidationError(error_msg) + # Do not write errors to stderr (this still raises exceptions) + options = re2.Options() + options.log_errors = False - return pattern + regex = re2.compile(value, options=options) + except re2.error as e: + if len(e.args) >= 1: + if isinstance(e.args[0], str): # nocoverage + raise ValidationError(_("Bad regular expression: {}").format(e.args[0])) + if isinstance(e.args[0], bytes): + raise ValidationError(_("Bad regular expression: {}").format(e.args[0].decode())) + raise ValidationError(_("Unknown regular expression error")) # nocoverage + + return regex def filter_format_validator(value: str) -> None: diff --git a/zerver/tests/test_markdown.py b/zerver/tests/test_markdown.py index 6c83c7fa17..702aab6d76 100644 --- a/zerver/tests/test_markdown.py +++ b/zerver/tests/test_markdown.py @@ -1406,26 +1406,25 @@ class MarkdownTest(ZulipTestCase): url_format_string = r"https://trac.example.com/ticket/%(id)s" linkifier_1 = RealmFilter( realm=realm, - pattern=r"(?PABC\-[0-9]+)(?![A-Z0-9-])", + pattern=r"(?PABC\-[0-9]+)", url_format_string=url_format_string, ) linkifier_1.save() self.assertEqual( linkifier_1.__str__(), - r"ABC\-[0-9]+)(?![A-Z0-9-])" - " https://trac.example.com/ticket/%(id)s>", + r"ABC\-[0-9]+) https://trac.example.com/ticket/%(id)s>", ) url_format_string = r"https://other-trac.example.com/ticket/%(id)s" linkifier_2 = RealmFilter( realm=realm, - pattern=r"(?P[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])", + pattern=r"(?P[A-Z][A-Z0-9]*\-[0-9]+)", url_format_string=url_format_string, ) linkifier_2.save() self.assertEqual( linkifier_2.__str__(), - r"[A-Z][A-Z0-9]*\-[0-9]+)(?![A-Z0-9-])" + r"[A-Z][A-Z0-9]*\-[0-9]+)" " https://other-trac.example.com/ticket/%(id)s>", ) diff --git a/zerver/tests/test_realm_linkifiers.py b/zerver/tests/test_realm_linkifiers.py index 49314a6b7a..10d9383b67 100644 --- a/zerver/tests/test_realm_linkifiers.py +++ b/zerver/tests/test_realm_linkifiers.py @@ -27,17 +27,13 @@ class RealmFilterTest(ZulipTestCase): result = self.client_post("/json/realm/filters", info=data) self.assert_json_error(result, "This field cannot be blank.") - data["pattern"] = "$a" + data["pattern"] = "(foo" result = self.client_post("/json/realm/filters", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: missing ): (foo") - data["pattern"] = r"ZUL-(?P\d++)" + data["pattern"] = r"ZUL-(?P\d????)" result = self.client_post("/json/realm/filters", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????") data["pattern"] = r"ZUL-(?P\d+)" data["url_format_string"] = "$fgfg" @@ -189,13 +185,11 @@ class RealmFilterTest(ZulipTestCase): ) data = { - "pattern": r"ZUL-(?P\d++)", + "pattern": r"ZUL-(?P\d????)", "url_format_string": "https://realm.com/my_realm_filter/%(id)s", } result = self.client_patch(f"/json/realm/filters/{linkifier_id}", info=data) - self.assert_json_error( - result, "Invalid linkifier pattern. Valid characters are [ a-zA-Z_#=/:+!-]." - ) + self.assert_json_error(result, "Bad regular expression: bad repetition operator: ????") data["pattern"] = r"ZUL-(?P\d+)" data["url_format_string"] = "$fgfg"