[manual] Add support for Embedly Extract

For sites that are supported, we now grab thumbnails for images + video embed code for videos and use them in lieu of our existing embed code. We also embed rich non-script content. Special casing is done so that we don't embed images twice. Some testcases were modified to avoid triggering Embed.ly The manual step is to install python-embedly. (imported from commit d725bab91675c61953116c5ca741055fce49724e)
2026-07-03 21:10:12 +08:00 · 2013-04-29 13:22:07 -07:00 · 2013-04-29 13:22:07 -07:00 · fac83ed2b7
commit fac83ed2b7
parent 8601c1670f
3 changed files with 46 additions and 3 deletions
--- a/humbug/settings.py
+++ b/humbug/settings.py
@ -513,6 +513,8 @@ if DEPLOYED:

 FILE_UPLOAD_MAX_MEMORY_SIZE = 0

+EMBEDLY_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
 if DEPLOYED:
    S3_KEY="xxxxxxxxxxxxxxxxxxxx"
    S3_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
--- a/zephyr/lib/bugdown/init.py
+++ b/zephyr/lib/bugdown/init.py
@ -18,6 +18,9 @@ from zephyr.lib.bugdown import codehilite, fenced_code
 from zephyr.lib.bugdown.fenced_code import FENCE_RE
 from zephyr.lib.timeout import timeout, TimeoutExpired
 from zephyr.lib.cache import cache_with_key
+from embedly import Embedly
+
+embedly_client = Embedly(settings.EMBEDLY_KEY)

 # Format version of the bugdown rendering; stored along with rendered
 # messages so that we can efficiently determine what needs to be re-rendered
@ -63,6 +66,39 @@ def add_a(root, url, link, height=None):
    img.set("src", url)


+class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
+    def run(self, root):
+        # Get all URLs from the blob
+        urls = walk_tree(root, lambda e: e.get("href") if e.tag == "a" else None)
+        for link in urls:
+            if not embedly_client.is_supported(link):
+                continue
+            try:
+                oembed_data = embedly_client.oembed(link, maxwidth=500)
+            except:
+                # we put this in its own try-except because it requires external
+                # connectivity. if embedly flakes out, we don't want to not-render
+                # the entire message; we just want to not show the embedly preview.
+                logging.warning(traceback.format_exc())
+                break
+            if oembed_data["type"] in ("link"):
+                continue
+            elif oembed_data["type"] in ("video", "rich") and "script" not in oembed_data["html"]:
+                placeholder = self.markdown.htmlStash.store(oembed_data["html"], safe=True)
+                el = markdown.util.etree.SubElement(root, "p")
+                el.text = placeholder
+            else:
+                try:
+                    add_a(root,
+                          oembed_data["thumbnail_url"],
+                          link,
+                          height=oembed_data["thumbnail_height"])
+                except KeyError:
+                    # We didn't have a thumbnail, so let's just bail and keep on going...
+                    continue
+            self.markdown.processed_hrefs.append(link)
+        return root
+
 class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
    def is_image(self, url):
        parsed_url = urlparse.urlparse(url)
@ -111,6 +147,8 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
    def run(self, root):
        image_urls = self.find_images(root)
        for (url, link) in image_urls:
+            if link in self.markdown.processed_hrefs:
+                continue
            add_a(root, url, link)

        return root
@ -467,6 +505,9 @@ class Bugdown(markdown.Extension):
                                 BugdownUListPreprocessor(md),
                                 "_begin")

+        md.processed_hrefs = []
+        if not settings.DEPLOYED or settings.STAGING_DEPLOYED:
+            md.treeprocessors.add("embedly_processor", EmbedlyProcessor(md), "_end")
        md.treeprocessors.add("inline_images", InlineImagePreviewProcessor(md), "_end")
        md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")

--- a/zephyr/tests.py
+++ b/zephyr/tests.py
@ -2067,11 +2067,11 @@ int x = 3
         ('at https://humbughq.com/api. Check it!',    "<p>at %s. Check it!</p>",           'https://humbughq.com/api'),
         ('goo.gl/abc',                                "<p>%s</p>",                         'goo.gl/abc'),
         ('I spent a year at ucl.ac.uk',               "<p>I spent a year at %s</p>",       'ucl.ac.uk'),
-         ('http://d.pr/i/FMXO',                        "<p>%s</p>",                         'http://d.pr/i/FMXO'),
+         ('http://a.cc/i/FMXO',                        "<p>%s</p>",                         'http://a.cc/i/FMXO'),
         ('http://fmota.eu/blog/test.html',            "<p>%s</p>",                         'http://fmota.eu/blog/test.html'),
         ('http://j.mp/14Hwm3X',                       "<p>%s</p>",                         'http://j.mp/14Hwm3X'),
         ('http://localhost:9991/?show_debug=1',       "<p>%s</p>",                         'http://localhost:9991/?show_debug=1'),
-         ('anyone before? (http://d.pr/i/FMXO)',       "<p>anyone before? (%s)</p>",        'http://d.pr/i/FMXO'),
+         ('anyone before? (http://a.cc/i/FMXO)',       "<p>anyone before? (%s)</p>",        'http://a.cc/i/FMXO'),
         ('(http://en.wikipedia.org/wiki/Each-way_(bet))',
            '<p>(%s)</p>',                   'http://en.wikipedia.org/wiki/Each-way_(bet)'),
         ('(http://en.wikipedia.org/wiki/Each-way_(bet)_(more_parens))',
@ -2189,7 +2189,7 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
        msg = 'Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE'
        converted = convert(msg)

-        self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<div class="message_inline_image"><a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE"><img src="http://i.ytimg.com/vi/hx1mjT73xYE/default.jpg"></a></div>')
+        self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<iframe width="500" height="281" src="http://www.youtube.com/embed/hx1mjT73xYE?feature=oembed" frameborder="0" allowfullscreen></iframe>')

    def test_inline_dropbox(self):
        msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'