Skip to content

Commit

Permalink
[xkcd] Use JSON API, add negative indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
embolalia committed Nov 1, 2013
1 parent b1e9654 commit 4613e22
Showing 1 changed file with 69 additions and 35 deletions.
104 changes: 69 additions & 35 deletions xkcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,66 +2,100 @@
xkcd.py - XKCD Module
Copyright 2010, Michael Yanovich (yanovich.net), and Morgan Goose
Copyright 2012, Lior Ramati
Copyright 2013, Edward Powell (embolalia.com)
Licensed under the Eiffel Forum License 2.
http://willie.dftba.net
"""

import json
import random
import re
from willie import web
from willie.modules.search import google_search
from willie.modules.url import find_title
from willie.module import commands
import urllib2
from lxml import etree
import re

ignored_sites = [ # For google searching
'almamater.xkcd.com',
'blog.xkcd.com',
'blag.xkcd.com',
'forums.xkcd.com',
'fora.xkcd.com',
'forums3.xkcd.com',
'store.xkcd.com',
'wiki.xkcd.com',
'what-if.xkcd.com',
]
sites_query = ' site:xkcd.com -site:' + ' -site:'.join(ignored_sites)


def get_info(number=None):
if number:
url = 'http://xkcd.com/{}/info.0.json'.format(number)
else:
url = 'http://xkcd.com/info.0.json'
data = web.get(url)
data = json.loads(data)
data['url'] = 'http://xkcd.com/' + str(data['num'])
return data


def google(query):
try:
query = query.encode('utf-8')
except:
pass
url = google_search(query + sites_query)
match = re.match('(?:https?://)?xkcd.com/(\d+)/?', url)
if match:
return match.group(1)


@commands('xkcd')
def xkcd(bot, trigger):
"""
.xkcd - Finds an xkcd comic strip. Takes one of 3 inputs:
If no input is provided it will return a random comic
If numeric input is provided it will return that comic
If numeric input is provided it will return that comic, or the nth-latest
comic if the number is non-positive
If non-numeric input is provided it will return the first google result for those keywords on the xkcd.com site
"""
# get latest comic for rand function and numeric input
body = urllib2.urlopen("http://xkcd.com/rss.xml").readlines()[1]
parsed = etree.fromstring(body)
newest = etree.tostring(parsed.findall("channel/item/link")[0])
max_int = int(newest.split("/")[-3])
latest = get_info()
max_int = latest['num']

# if no input is given (pre - lior's edits code)
if not trigger.group(2): # get rand comic
random.seed()
website = "http://xkcd.com/%d/" % random.randint(0, max_int + 1)
requested = get_info(random.randint(0, max_int + 1))
else:
query = trigger.group(2).strip()

# numeric input! get that comic number if it exists
if (query.isdigit()):
if (int(query) > max_int):
bot.say("Sorry, comic #" + query + " hasn't been posted yet. The last comic was #%d" % max_int)
# Positive or 0; get given number or latest
if query.isdigit():
query = int(query)
if query > max_int:
bot.say(("Sorry, comic #{} hasn't been posted yet. "
"The last comic was #{}").format(query, max_int))
return
elif query == 0:
requested = latest
else:
website = "http://xkcd.com/" + query

# non-numeric input! code lifted from search.g
requested = get_info(query)
# Negative: go back that many from current
elif query[0] == '-' and query[1:].isdigit():
query = int(query[1:])
requested = get_info(max_int - query)
# Non-number: google.
else:
if (query.lower() == "latest" or query.lower() == "newest"): # special commands
website = "https://xkcd.com/"
else: # just google
try:
query = query.encode('utf-8')
except:
pass
website = google_search("site:xkcd.com " + query)
chkForum = re.match(re.compile(r'.*?([0-9].*?):.*'), find_title(website)) # regex for comic specific forum threads
if (chkForum):
website = "http://xkcd.com/" + chkForum.groups()[0].lstrip('0')
if website: # format and say result
website += ' [' + find_title(website)[6:] + ']'
bot.say(website)
elif website is False:
bot.say("Problem getting data from Google.")
else:
bot.say("No results found for '%s'." % query)
if (query.lower() == "latest" or query.lower() == "newest"):
requested = latest
else:
number = google(query)
if not number:
bot.say('Could not find any comics for that query.')
return
requested = get_info(number)

message = '{} [{}]'.format(requested['url'], requested['title'])
bot.say(message)

0 comments on commit 4613e22

Please sign in to comment.