mirror of
https://github.com/zulip/zulip.git
synced 2026-06-27 21:01:32 +08:00
oembed: Remove unsound HTML filtering.
The frontend now takes care of confining the HTML. Signed-off-by: Anders Kaseorg <anders@zulipchat.com>
This commit is contained in:
parent
8459185970
commit
faa3ea0b8e
@ -1,4 +1,3 @@
|
||||
from bs4 import BeautifulSoup, SoupStrainer
|
||||
from typing import Optional, Dict, Any
|
||||
from pyoembed import oEmbed, PyOembedException
|
||||
|
||||
@ -20,25 +19,16 @@ def get_oembed_data(url: str,
|
||||
data['oembed'] = True
|
||||
|
||||
elif oembed_resource_type == 'video' and html and thumbnail:
|
||||
data['html'] = get_safe_html(html)
|
||||
data['html'] = strip_cdata(html)
|
||||
data['image'] = thumbnail
|
||||
# Add a key to identify oembed metadata as opposed to other metadata
|
||||
data['oembed'] = True
|
||||
|
||||
return data
|
||||
|
||||
def get_safe_html(html: str) -> str:
|
||||
"""Return a safe version of the oEmbed html.
|
||||
|
||||
Verify that the HTML:
|
||||
1. has a single iframe
|
||||
2. the src uses a schema relative URL or explicitly specifies http(s)
|
||||
|
||||
"""
|
||||
def strip_cdata(html: str) -> str:
|
||||
# Work around a bug in SoundCloud's XML generation:
|
||||
# <html><![CDATA[<iframe ...></iframe>]]></html>
|
||||
if html.startswith('<![CDATA[') and html.endswith(']]>'):
|
||||
html = html[9:-3]
|
||||
soup = BeautifulSoup(html, 'lxml', parse_only=SoupStrainer('iframe'))
|
||||
iframe = soup.find('iframe')
|
||||
if iframe is not None and iframe.get('src').startswith(('http://', 'https://', '//')):
|
||||
return str(soup)
|
||||
return ''
|
||||
return html
|
||||
|
||||
@ -14,7 +14,7 @@ from zerver.lib.test_helpers import MockPythonResponse
|
||||
from zerver.worker.queue_processors import FetchLinksEmbedData
|
||||
from zerver.lib.url_preview.preview import (
|
||||
get_link_embed_data, link_embed_data_from_cache)
|
||||
from zerver.lib.url_preview.oembed import get_oembed_data, get_safe_html
|
||||
from zerver.lib.url_preview.oembed import get_oembed_data, strip_cdata
|
||||
from zerver.lib.url_preview.parsers import (
|
||||
OpenGraphParser, GenericParser)
|
||||
from zerver.lib.cache import cache_set, NotFoundInCache, preview_url_cache_key
|
||||
@ -116,22 +116,16 @@ class OembedTestCase(ZulipTestCase):
|
||||
data = get_oembed_data(url)
|
||||
self.assertIsNone(data)
|
||||
|
||||
def test_safe_oembed_html(self) -> None:
|
||||
def test_oembed_html(self) -> None:
|
||||
html = '<iframe src="//www.instagram.com/embed.js"></iframe>'
|
||||
safe_html = get_safe_html(html)
|
||||
self.assertEqual(html, safe_html)
|
||||
|
||||
def test_unsafe_oembed_html(self) -> None:
|
||||
html = ('<blockquote class="instagram-media" data-instgrm-captioned>test</blockquote>\n'
|
||||
'<script async src="//www.instagram.com/embed.js"></script>')
|
||||
safe_html = get_safe_html(html)
|
||||
self.assertEqual('', safe_html)
|
||||
stripped_html = strip_cdata(html)
|
||||
self.assertEqual(html, stripped_html)
|
||||
|
||||
def test_autodiscovered_oembed_xml_format_html(self) -> None:
|
||||
iframe_content = '<iframe src="https://w.soundcloud.com/player"></iframe>'
|
||||
html = '<![CDATA[{}]]>'.format(iframe_content)
|
||||
safe_html = get_safe_html(html)
|
||||
self.assertEqual(iframe_content, safe_html)
|
||||
stripped_html = strip_cdata(html)
|
||||
self.assertEqual(iframe_content, stripped_html)
|
||||
|
||||
|
||||
class OpenGraphParserTestCase(ZulipTestCase):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user