Source code for omnipresence.plugins.url

# -*- test-case-name: omnipresence.plugins.url.test_url
"""Event plugins for previewing the content of mentioned URLs."""


import re

from littlebrother import TitleFetcher
from twisted.internet.defer import DeferredList

from ...plugin import EventPlugin
from ...web.http import IdentifyingAgent


# Based on django.utils.html.urlize from the Django project.
TRAILING_PUNCTUATION = [u'.', u',', u':', u';', u'.)', u'"', u"'", u'!']
WRAPPING_PUNCTUATION = [(u'(', u')'), (u'<', u'>'), (u'[', u']'),
                        (u'"', u'"'), (u"'", u"'")]
WORD_SPLIT_RE = re.compile(ur"""([\s<>"']+)""")
SIMPLE_URL_RE = re.compile(ur'^https?://\[?\w', re.IGNORECASE | re.UNICODE)


def extract_iris(text):
    """Return an iterator yielding IRIs from a Unicode string."""
    for word in WORD_SPLIT_RE.split(text):
        if not (u'.' in word or u':' in word):
            continue
        # Deal with punctuation.
        lead, middle, trail = u'', word, u''
        for punctuation in TRAILING_PUNCTUATION:
            if middle.endswith(punctuation):
                middle = middle[:-len(punctuation)]
                trail = punctuation + trail
        for opening, closing in WRAPPING_PUNCTUATION:
            if middle.startswith(opening):
                middle = middle[len(opening):]
                lead += opening
            # Keep parentheses at the end only if they're balanced.
            if (middle.endswith(closing) and
                    middle.count(closing) == middle.count(opening) + 1):
                middle = middle[:-len(closing)]
                trail = closing + trail
        # Yield the resulting URL.
        if SIMPLE_URL_RE.match(middle):
            yield middle


[docs]class Default(EventPlugin): """Fetch the titles of URLs mentioned in normal messages or actions. Requires `Little Brother`__. __ https://github.com/kxz/littlebrother :charlie: http://www.example.com/ is an example site :bot: [www.example.com] Example Domain :alice: http://www.example.org/ and http://www.example.net/ too :bot: [www.example.org] Example Domain :bot: [www.example.net] Example Domain """ def __init__(self): self.fetcher = TitleFetcher() self.fetcher.agent = IdentifyingAgent(self.fetcher.agent) def on_privmsg(self, msg): fetches = [] for iri in extract_iris(msg.content.decode(msg.encoding, 'replace')): self.log.debug( 'Saw URL {iri} from {msg.actor} in venue {msg.venue}', iri=iri.encode('utf-8'), msg=msg) fetches.append(self.fetcher.fetch_title( iri, hostname_tag=True, friendly_errors=True)) finished = DeferredList(fetches) finished.addCallback(self.send_replies, msg) return finished on_action = on_privmsg def send_replies(self, results, msg): for success, value in results: if success: msg.connection.reply(value, msg) else: self.log.failure( 'Unhandled error during URL title extraction', failure=value)