From fac83ed2b71d6a0fe4902f95ebca2e1200d8dae0 Mon Sep 17 00:00:00 2001
From: Luke Faraone
Date: Mon, 29 Apr 2013 13:22:07 -0700
Subject: [PATCH] [manual] Add support for Embedly Extract
For sites that are supported, we now grab thumbnails for images + video
embed code for videos and use them in lieu of our existing embed code.
We also embed rich non-script content.
Special casing is done so that we don't embed images twice.
Some testcases were modified to avoid triggering Embed.ly
The manual step is to install python-embedly.
(imported from commit d725bab91675c61953116c5ca741055fce49724e)
---
humbug/settings.py | 2 ++
zephyr/lib/bugdown/__init__.py | 41 ++++++++++++++++++++++++++++++++++
zephyr/tests.py | 6 ++---
3 files changed, 46 insertions(+), 3 deletions(-)
diff --git a/humbug/settings.py b/humbug/settings.py
index 713d2b454c..5b60a4d1ab 100644
--- a/humbug/settings.py
+++ b/humbug/settings.py
@@ -513,6 +513,8 @@ if DEPLOYED:
FILE_UPLOAD_MAX_MEMORY_SIZE = 0
+EMBEDLY_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
if DEPLOYED:
S3_KEY="xxxxxxxxxxxxxxxxxxxx"
S3_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
diff --git a/zephyr/lib/bugdown/__init__.py b/zephyr/lib/bugdown/__init__.py
index 0114b2ea7c..75a41a4fed 100644
--- a/zephyr/lib/bugdown/__init__.py
+++ b/zephyr/lib/bugdown/__init__.py
@@ -18,6 +18,9 @@ from zephyr.lib.bugdown import codehilite, fenced_code
from zephyr.lib.bugdown.fenced_code import FENCE_RE
from zephyr.lib.timeout import timeout, TimeoutExpired
from zephyr.lib.cache import cache_with_key
+from embedly import Embedly
+
+embedly_client = Embedly(settings.EMBEDLY_KEY)
# Format version of the bugdown rendering; stored along with rendered
# messages so that we can efficiently determine what needs to be re-rendered
@@ -63,6 +66,39 @@ def add_a(root, url, link, height=None):
img.set("src", url)
+class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
+ def run(self, root):
+ # Get all URLs from the blob
+ urls = walk_tree(root, lambda e: e.get("href") if e.tag == "a" else None)
+ for link in urls:
+ if not embedly_client.is_supported(link):
+ continue
+ try:
+ oembed_data = embedly_client.oembed(link, maxwidth=500)
+ except:
+ # we put this in its own try-except because it requires external
+ # connectivity. if embedly flakes out, we don't want to not-render
+ # the entire message; we just want to not show the embedly preview.
+ logging.warning(traceback.format_exc())
+ break
+ if oembed_data["type"] in ("link"):
+ continue
+ elif oembed_data["type"] in ("video", "rich") and "script" not in oembed_data["html"]:
+ placeholder = self.markdown.htmlStash.store(oembed_data["html"], safe=True)
+ el = markdown.util.etree.SubElement(root, "p")
+ el.text = placeholder
+ else:
+ try:
+ add_a(root,
+ oembed_data["thumbnail_url"],
+ link,
+ height=oembed_data["thumbnail_height"])
+ except KeyError:
+ # We didn't have a thumbnail, so let's just bail and keep on going...
+ continue
+ self.markdown.processed_hrefs.append(link)
+ return root
+
class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
def is_image(self, url):
parsed_url = urlparse.urlparse(url)
@@ -111,6 +147,8 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
def run(self, root):
image_urls = self.find_images(root)
for (url, link) in image_urls:
+ if link in self.markdown.processed_hrefs:
+ continue
add_a(root, url, link)
return root
@@ -467,6 +505,9 @@ class Bugdown(markdown.Extension):
BugdownUListPreprocessor(md),
"_begin")
+ md.processed_hrefs = []
+ if not settings.DEPLOYED or settings.STAGING_DEPLOYED:
+ md.treeprocessors.add("embedly_processor", EmbedlyProcessor(md), "_end")
md.treeprocessors.add("inline_images", InlineImagePreviewProcessor(md), "_end")
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")
diff --git a/zephyr/tests.py b/zephyr/tests.py
index 5c4a99eb04..f17dc6a9c4 100644
--- a/zephyr/tests.py
+++ b/zephyr/tests.py
@@ -2067,11 +2067,11 @@ int x = 3
('at https://humbughq.com/api. Check it!', "
at %s. Check it!
", 'https://humbughq.com/api'),
('goo.gl/abc', "
%s
", 'goo.gl/abc'),
('I spent a year at ucl.ac.uk', "