From fac83ed2b71d6a0fe4902f95ebca2e1200d8dae0 Mon Sep 17 00:00:00 2001
From: Luke Faraone <lfaraone@humbughq.com>
Date: Mon, 29 Apr 2013 13:22:07 -0700
Subject: [PATCH] [manual] Add support for Embedly Extract

For sites that are supported, we now grab thumbnails for images + video
embed code for videos and use them in lieu of our existing embed code.

We also embed rich non-script content.

Special casing is done so that we don't embed images twice.

Some testcases were modified to avoid triggering Embed.ly

The manual step is to install python-embedly.

(imported from commit d725bab91675c61953116c5ca741055fce49724e)
---
 humbug/settings.py             |  2 ++
 zephyr/lib/bugdown/__init__.py | 41 ++++++++++++++++++++++++++++++++++
 zephyr/tests.py                |  6 ++---
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/humbug/settings.py b/humbug/settings.py
index 713d2b454c..5b60a4d1ab 100644
--- a/humbug/settings.py
+++ b/humbug/settings.py
@@ -513,6 +513,8 @@ if DEPLOYED:
 
 FILE_UPLOAD_MAX_MEMORY_SIZE = 0
 
+EMBEDLY_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
 if DEPLOYED:
     S3_KEY="xxxxxxxxxxxxxxxxxxxx"
     S3_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
diff --git a/zephyr/lib/bugdown/__init__.py b/zephyr/lib/bugdown/__init__.py
index 0114b2ea7c..75a41a4fed 100644
--- a/zephyr/lib/bugdown/__init__.py
+++ b/zephyr/lib/bugdown/__init__.py
@@ -18,6 +18,9 @@ from zephyr.lib.bugdown import codehilite, fenced_code
 from zephyr.lib.bugdown.fenced_code import FENCE_RE
 from zephyr.lib.timeout import timeout, TimeoutExpired
 from zephyr.lib.cache import cache_with_key
+from embedly import Embedly
+
+embedly_client = Embedly(settings.EMBEDLY_KEY)
 
 # Format version of the bugdown rendering; stored along with rendered
 # messages so that we can efficiently determine what needs to be re-rendered
@@ -63,6 +66,39 @@ def add_a(root, url, link, height=None):
     img.set("src", url)
 
 
+class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
+    def run(self, root):
+        # Get all URLs from the blob
+        urls = walk_tree(root, lambda e: e.get("href") if e.tag == "a" else None)
+        for link in urls:
+            if not embedly_client.is_supported(link):
+                continue
+            try:
+                oembed_data = embedly_client.oembed(link, maxwidth=500)
+            except:
+                # we put this in its own try-except because it requires external
+                # connectivity. if embedly flakes out, we don't want to not-render
+                # the entire message; we just want to not show the embedly preview.
+                logging.warning(traceback.format_exc())
+                break
+            if oembed_data["type"] in ("link"):
+                continue
+            elif oembed_data["type"] in ("video", "rich") and "script" not in oembed_data["html"]:
+                placeholder = self.markdown.htmlStash.store(oembed_data["html"], safe=True)
+                el = markdown.util.etree.SubElement(root, "p")
+                el.text = placeholder
+            else:
+                try:
+                    add_a(root,
+                          oembed_data["thumbnail_url"],
+                          link,
+                          height=oembed_data["thumbnail_height"])
+                except KeyError:
+                    # We didn't have a thumbnail, so let's just bail and keep on going...
+                    continue
+            self.markdown.processed_hrefs.append(link)
+        return root
+
 class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
     def is_image(self, url):
         parsed_url = urlparse.urlparse(url)
@@ -111,6 +147,8 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
     def run(self, root):
         image_urls = self.find_images(root)
         for (url, link) in image_urls:
+            if link in self.markdown.processed_hrefs:
+                continue
             add_a(root, url, link)
 
         return root
@@ -467,6 +505,9 @@ class Bugdown(markdown.Extension):
                                  BugdownUListPreprocessor(md),
                                  "_begin")
 
+        md.processed_hrefs = []
+        if not settings.DEPLOYED or settings.STAGING_DEPLOYED:
+            md.treeprocessors.add("embedly_processor", EmbedlyProcessor(md), "_end")
         md.treeprocessors.add("inline_images", InlineImagePreviewProcessor(md), "_end")
         md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
 
diff --git a/zephyr/tests.py b/zephyr/tests.py
index 5c4a99eb04..f17dc6a9c4 100644
--- a/zephyr/tests.py
+++ b/zephyr/tests.py
@@ -2067,11 +2067,11 @@ int x = 3
          ('at https://humbughq.com/api. Check it!',    "<p>at %s. Check it!</p>",           'https://humbughq.com/api'),
          ('goo.gl/abc',                                "<p>%s</p>",                         'goo.gl/abc'),
          ('I spent a year at ucl.ac.uk',               "<p>I spent a year at %s</p>",       'ucl.ac.uk'),
-         ('http://d.pr/i/FMXO',                        "<p>%s</p>",                         'http://d.pr/i/FMXO'),
+         ('http://a.cc/i/FMXO',                        "<p>%s</p>",                         'http://a.cc/i/FMXO'),
          ('http://fmota.eu/blog/test.html',            "<p>%s</p>",                         'http://fmota.eu/blog/test.html'),
          ('http://j.mp/14Hwm3X',                       "<p>%s</p>",                         'http://j.mp/14Hwm3X'),
          ('http://localhost:9991/?show_debug=1',       "<p>%s</p>",                         'http://localhost:9991/?show_debug=1'),
-         ('anyone before? (http://d.pr/i/FMXO)',       "<p>anyone before? (%s)</p>",        'http://d.pr/i/FMXO'),
+         ('anyone before? (http://a.cc/i/FMXO)',       "<p>anyone before? (%s)</p>",        'http://a.cc/i/FMXO'),
          ('(http://en.wikipedia.org/wiki/Each-way_(bet))',
             '<p>(%s)</p>',                   'http://en.wikipedia.org/wiki/Each-way_(bet)'),
          ('(http://en.wikipedia.org/wiki/Each-way_(bet)_(more_parens))',
@@ -2189,7 +2189,7 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
         msg = 'Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE'
         converted = convert(msg)
 
-        self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<div class="message_inline_image"><a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE"><img src="http://i.ytimg.com/vi/hx1mjT73xYE/default.jpg"></a></div>')
+        self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<iframe width="500" height="281" src="http://www.youtube.com/embed/hx1mjT73xYE?feature=oembed" frameborder="0" allowfullscreen></iframe>')
 
     def test_inline_dropbox(self):
         msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'