From fac83ed2b71d6a0fe4902f95ebca2e1200d8dae0 Mon Sep 17 00:00:00 2001 From: Luke Faraone Date: Mon, 29 Apr 2013 13:22:07 -0700 Subject: [PATCH] [manual] Add support for Embedly Extract For sites that are supported, we now grab thumbnails for images + video embed code for videos and use them in lieu of our existing embed code. We also embed rich non-script content. Special casing is done so that we don't embed images twice. Some testcases were modified to avoid triggering Embed.ly The manual step is to install python-embedly. (imported from commit d725bab91675c61953116c5ca741055fce49724e) --- humbug/settings.py | 2 ++ zephyr/lib/bugdown/__init__.py | 41 ++++++++++++++++++++++++++++++++++ zephyr/tests.py | 6 ++--- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/humbug/settings.py b/humbug/settings.py index 713d2b454c..5b60a4d1ab 100644 --- a/humbug/settings.py +++ b/humbug/settings.py @@ -513,6 +513,8 @@ if DEPLOYED: FILE_UPLOAD_MAX_MEMORY_SIZE = 0 +EMBEDLY_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + if DEPLOYED: S3_KEY="xxxxxxxxxxxxxxxxxxxx" S3_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" diff --git a/zephyr/lib/bugdown/__init__.py b/zephyr/lib/bugdown/__init__.py index 0114b2ea7c..75a41a4fed 100644 --- a/zephyr/lib/bugdown/__init__.py +++ b/zephyr/lib/bugdown/__init__.py @@ -18,6 +18,9 @@ from zephyr.lib.bugdown import codehilite, fenced_code from zephyr.lib.bugdown.fenced_code import FENCE_RE from zephyr.lib.timeout import timeout, TimeoutExpired from zephyr.lib.cache import cache_with_key +from embedly import Embedly + +embedly_client = Embedly(settings.EMBEDLY_KEY) # Format version of the bugdown rendering; stored along with rendered # messages so that we can efficiently determine what needs to be re-rendered @@ -63,6 +66,39 @@ def add_a(root, url, link, height=None): img.set("src", url) +class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor): + def run(self, root): + # Get all URLs from the blob + urls = walk_tree(root, lambda e: e.get("href") if e.tag == "a" else None) + for link in urls: + if not embedly_client.is_supported(link): + continue + try: + oembed_data = embedly_client.oembed(link, maxwidth=500) + except: + # we put this in its own try-except because it requires external + # connectivity. if embedly flakes out, we don't want to not-render + # the entire message; we just want to not show the embedly preview. + logging.warning(traceback.format_exc()) + break + if oembed_data["type"] in ("link"): + continue + elif oembed_data["type"] in ("video", "rich") and "script" not in oembed_data["html"]: + placeholder = self.markdown.htmlStash.store(oembed_data["html"], safe=True) + el = markdown.util.etree.SubElement(root, "p") + el.text = placeholder + else: + try: + add_a(root, + oembed_data["thumbnail_url"], + link, + height=oembed_data["thumbnail_height"]) + except KeyError: + # We didn't have a thumbnail, so let's just bail and keep on going... + continue + self.markdown.processed_hrefs.append(link) + return root + class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor): def is_image(self, url): parsed_url = urlparse.urlparse(url) @@ -111,6 +147,8 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor): def run(self, root): image_urls = self.find_images(root) for (url, link) in image_urls: + if link in self.markdown.processed_hrefs: + continue add_a(root, url, link) return root @@ -467,6 +505,9 @@ class Bugdown(markdown.Extension): BugdownUListPreprocessor(md), "_begin") + md.processed_hrefs = [] + if not settings.DEPLOYED or settings.STAGING_DEPLOYED: + md.treeprocessors.add("embedly_processor", EmbedlyProcessor(md), "_end") md.treeprocessors.add("inline_images", InlineImagePreviewProcessor(md), "_end") md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end") diff --git a/zephyr/tests.py b/zephyr/tests.py index 5c4a99eb04..f17dc6a9c4 100644 --- a/zephyr/tests.py +++ b/zephyr/tests.py @@ -2067,11 +2067,11 @@ int x = 3 ('at https://humbughq.com/api. Check it!', "

at %s. Check it!

", 'https://humbughq.com/api'), ('goo.gl/abc', "

%s

", 'goo.gl/abc'), ('I spent a year at ucl.ac.uk', "

I spent a year at %s

", 'ucl.ac.uk'), - ('http://d.pr/i/FMXO', "

%s

", 'http://d.pr/i/FMXO'), + ('http://a.cc/i/FMXO', "

%s

", 'http://a.cc/i/FMXO'), ('http://fmota.eu/blog/test.html', "

%s

", 'http://fmota.eu/blog/test.html'), ('http://j.mp/14Hwm3X', "

%s

", 'http://j.mp/14Hwm3X'), ('http://localhost:9991/?show_debug=1', "

%s

", 'http://localhost:9991/?show_debug=1'), - ('anyone before? (http://d.pr/i/FMXO)', "

anyone before? (%s)

", 'http://d.pr/i/FMXO'), + ('anyone before? (http://a.cc/i/FMXO)', "

anyone before? (%s)

", 'http://a.cc/i/FMXO'), ('(http://en.wikipedia.org/wiki/Each-way_(bet))', '

(%s)

', 'http://en.wikipedia.org/wiki/Each-way_(bet)'), ('(http://en.wikipedia.org/wiki/Each-way_(bet)_(more_parens))', @@ -2189,7 +2189,7 @@ xxxxxxx

\n

xxxxxxx xxxxx xxxx xxxxx:
\nxxxxxx: xxx msg = 'Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE' converted = convert(msg) - self.assertEqual(converted, '

Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE

\n
') + self.assertEqual(converted, '

Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE

\n') def test_inline_dropbox(self): msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'