[xkcd] Use JSON API, add negative indexing

maxpowa · Nov 1, 2013 · 4613e22 · 4613e22
1 parent b1e9654
commit 4613e22
Showing 1 changed file with 69 additions and 35 deletions.
diff --git a/xkcd.py b/xkcd.py
@@ -2,66 +2,100 @@
 xkcd.py - XKCD Module
 Copyright 2010, Michael Yanovich (yanovich.net), and Morgan Goose
 Copyright 2012, Lior Ramati
+Copyright 2013, Edward Powell (embolalia.com)
 Licensed under the Eiffel Forum License 2.
 
 http://willie.dftba.net
 """
 
+import json
 import random
+import re
+from willie import web
 from willie.modules.search import google_search
-from willie.modules.url import find_title
 from willie.module import commands
-import urllib2
-from lxml import etree
-import re
+
+ignored_sites = [  # For google searching
+    'almamater.xkcd.com',
+    'blog.xkcd.com',
+    'blag.xkcd.com',
+    'forums.xkcd.com',
+    'fora.xkcd.com',
+    'forums3.xkcd.com',
+    'store.xkcd.com',
+    'wiki.xkcd.com',
+    'what-if.xkcd.com',
+]
+sites_query = ' site:xkcd.com -site:' + ' -site:'.join(ignored_sites)
+
+
+def get_info(number=None):
+    if number:
+        url = 'http://xkcd.com/{}/info.0.json'.format(number)
+    else:
+        url = 'http://xkcd.com/info.0.json'
+    data = web.get(url)
+    data = json.loads(data)
+    data['url'] = 'http://xkcd.com/' + str(data['num'])
+    return data
+
+
+def google(query):
+    try:
+        query = query.encode('utf-8')
+    except:
+        pass
+    url = google_search(query + sites_query)
+    match = re.match('(?:https?://)?xkcd.com/(\d+)/?', url)
+    if match:
+        return match.group(1)
 
 
 @commands('xkcd')
 def xkcd(bot, trigger):
     """
     .xkcd - Finds an xkcd comic strip. Takes one of 3 inputs:
     If no input is provided it will return a random comic
-    If numeric input is provided it will return that comic
+    If numeric input is provided it will return that comic, or the nth-latest
+    comic if the number is non-positive
     If non-numeric input is provided it will return the first google result for those keywords on the xkcd.com site
     """
     # get latest comic for rand function and numeric input
-    body = urllib2.urlopen("http://xkcd.com/rss.xml").readlines()[1]
-    parsed = etree.fromstring(body)
-    newest = etree.tostring(parsed.findall("channel/item/link")[0])
-    max_int = int(newest.split("/")[-3])
+    latest = get_info()
+    max_int = latest['num']
 
     # if no input is given (pre - lior's edits code)
     if not trigger.group(2):  # get rand comic
         random.seed()
-        website = "http://xkcd.com/%d/" % random.randint(0, max_int + 1)
+        requested = get_info(random.randint(0, max_int + 1))
     else:
         query = trigger.group(2).strip()
 
-        # numeric input! get that comic number if it exists
-        if (query.isdigit()):
-            if (int(query) > max_int):
-                bot.say("Sorry, comic #" + query + " hasn't been posted yet. The last comic was #%d" % max_int)
+        # Positive or 0; get given number or latest
+        if query.isdigit():
+            query = int(query)
+            if query > max_int:
+                bot.say(("Sorry, comic #{} hasn't been posted yet. "
+                         "The last comic was #{}").format(query, max_int))
                 return
+            elif query == 0:
+                requested = latest
             else:
-                website = "http://xkcd.com/" + query
-
-        # non-numeric input! code lifted from search.g
+                requested = get_info(query)
+        # Negative: go back that many from current
+        elif query[0] == '-' and query[1:].isdigit():
+            query = int(query[1:])
+            requested = get_info(max_int - query)
+        # Non-number: google.
         else:
-            if (query.lower() == "latest" or query.lower() == "newest"):  # special commands
-                website = "https://xkcd.com/"
-            else:  # just google
-                try:
-                    query = query.encode('utf-8')
-                except:
-                    pass
-                website = google_search("site:xkcd.com " + query)
-                chkForum = re.match(re.compile(r'.*?([0-9].*?):.*'), find_title(website))  # regex for comic specific forum threads
-                if (chkForum):
-                    website = "http://xkcd.com/" + chkForum.groups()[0].lstrip('0')
-    if website:  # format and say result
-        website += ' [' + find_title(website)[6:] + ']'
-        bot.say(website)
-    elif website is False:
-        bot.say("Problem getting data from Google.")
-    else:
-        bot.say("No results found for '%s'." % query)
+            if (query.lower() == "latest" or query.lower() == "newest"):
+                requested = latest
+            else:
+                number = google(query)
+                if not number:
+                    bot.say('Could not find any comics for that query.')
+                    return
+                requested = get_info(number)
+
+    message = '{} [{}]'.format(requested['url'], requested['title'])
+    bot.say(message)