[manual] Add support for Embedly Extract

For sites that are supported, we now grab thumbnails for images + video
embed code for videos and use them in lieu of our existing embed code.

We also embed rich non-script content.

Special casing is done so that we don't embed images twice.

Some testcases were modified to avoid triggering Embed.ly

The manual step is to install python-embedly.

(imported from commit d725bab91675c61953116c5ca741055fce49724e)
This commit is contained in:
Luke Faraone 2013-04-29 13:22:07 -07:00 committed by Tim Abbott
parent 8601c1670f
commit fac83ed2b7
3 changed files with 46 additions and 3 deletions

View File

@ -513,6 +513,8 @@ if DEPLOYED:
FILE_UPLOAD_MAX_MEMORY_SIZE = 0
EMBEDLY_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
if DEPLOYED:
S3_KEY="xxxxxxxxxxxxxxxxxxxx"
S3_SECRET_KEY="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

View File

@ -18,6 +18,9 @@ from zephyr.lib.bugdown import codehilite, fenced_code
from zephyr.lib.bugdown.fenced_code import FENCE_RE
from zephyr.lib.timeout import timeout, TimeoutExpired
from zephyr.lib.cache import cache_with_key
from embedly import Embedly
embedly_client = Embedly(settings.EMBEDLY_KEY)
# Format version of the bugdown rendering; stored along with rendered
# messages so that we can efficiently determine what needs to be re-rendered
@ -63,6 +66,39 @@ def add_a(root, url, link, height=None):
img.set("src", url)
class EmbedlyProcessor(markdown.treeprocessors.Treeprocessor):
def run(self, root):
# Get all URLs from the blob
urls = walk_tree(root, lambda e: e.get("href") if e.tag == "a" else None)
for link in urls:
if not embedly_client.is_supported(link):
continue
try:
oembed_data = embedly_client.oembed(link, maxwidth=500)
except:
# we put this in its own try-except because it requires external
# connectivity. if embedly flakes out, we don't want to not-render
# the entire message; we just want to not show the embedly preview.
logging.warning(traceback.format_exc())
break
if oembed_data["type"] in ("link"):
continue
elif oembed_data["type"] in ("video", "rich") and "script" not in oembed_data["html"]:
placeholder = self.markdown.htmlStash.store(oembed_data["html"], safe=True)
el = markdown.util.etree.SubElement(root, "p")
el.text = placeholder
else:
try:
add_a(root,
oembed_data["thumbnail_url"],
link,
height=oembed_data["thumbnail_height"])
except KeyError:
# We didn't have a thumbnail, so let's just bail and keep on going...
continue
self.markdown.processed_hrefs.append(link)
return root
class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
def is_image(self, url):
parsed_url = urlparse.urlparse(url)
@ -111,6 +147,8 @@ class InlineImagePreviewProcessor(markdown.treeprocessors.Treeprocessor):
def run(self, root):
image_urls = self.find_images(root)
for (url, link) in image_urls:
if link in self.markdown.processed_hrefs:
continue
add_a(root, url, link)
return root
@ -467,6 +505,9 @@ class Bugdown(markdown.Extension):
BugdownUListPreprocessor(md),
"_begin")
md.processed_hrefs = []
if not settings.DEPLOYED or settings.STAGING_DEPLOYED:
md.treeprocessors.add("embedly_processor", EmbedlyProcessor(md), "_end")
md.treeprocessors.add("inline_images", InlineImagePreviewProcessor(md), "_end")
md.treeprocessors.add("inline_interesting_links", InlineInterestingLinkProcessor(md), "_end")

View File

@ -2067,11 +2067,11 @@ int x = 3
('at https://humbughq.com/api. Check it!', "<p>at %s. Check it!</p>", 'https://humbughq.com/api'),
('goo.gl/abc', "<p>%s</p>", 'goo.gl/abc'),
('I spent a year at ucl.ac.uk', "<p>I spent a year at %s</p>", 'ucl.ac.uk'),
('http://d.pr/i/FMXO', "<p>%s</p>", 'http://d.pr/i/FMXO'),
('http://a.cc/i/FMXO', "<p>%s</p>", 'http://a.cc/i/FMXO'),
('http://fmota.eu/blog/test.html', "<p>%s</p>", 'http://fmota.eu/blog/test.html'),
('http://j.mp/14Hwm3X', "<p>%s</p>", 'http://j.mp/14Hwm3X'),
('http://localhost:9991/?show_debug=1', "<p>%s</p>", 'http://localhost:9991/?show_debug=1'),
('anyone before? (http://d.pr/i/FMXO)', "<p>anyone before? (%s)</p>", 'http://d.pr/i/FMXO'),
('anyone before? (http://a.cc/i/FMXO)', "<p>anyone before? (%s)</p>", 'http://a.cc/i/FMXO'),
('(http://en.wikipedia.org/wiki/Each-way_(bet))',
'<p>(%s)</p>', 'http://en.wikipedia.org/wiki/Each-way_(bet)'),
('(http://en.wikipedia.org/wiki/Each-way_(bet)_(more_parens))',
@ -2189,7 +2189,7 @@ xxxxxxx</strong></p>\n<p>xxxxxxx xxxxx xxxx xxxxx:<br>\n<code>xxxxxx</code>: xxx
msg = 'Check out the debate: http://www.youtube.com/watch?v=hx1mjT73xYE'
converted = convert(msg)
self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<div class="message_inline_image"><a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE"><img src="http://i.ytimg.com/vi/hx1mjT73xYE/default.jpg"></a></div>')
self.assertEqual(converted, '<p>Check out the debate: <a href="http://www.youtube.com/watch?v=hx1mjT73xYE" target="_blank" title="http://www.youtube.com/watch?v=hx1mjT73xYE">http://www.youtube.com/watch?v=hx1mjT73xYE</a></p>\n<iframe width="500" height="281" src="http://www.youtube.com/embed/hx1mjT73xYE?feature=oembed" frameborder="0" allowfullscreen></iframe>')
def test_inline_dropbox(self):
msg = 'Look at how hilarious our old office was: https://www.dropbox.com/s/ymdijjcg67hv2ta/IMG_0923.JPG'