Skip to content

Commit

Permalink
Merge pull request #1 from sopython/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Keiron Pizzey authored Jul 8, 2016
2 parents 1bee5bd + 366b5ff commit b3bad2e
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 25 deletions.
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
History
=======

0.2.0 (2016-07-08)
------------------

* Implemented basic eridu logic to access SE.
* Added CLI that will get content on a set schedule.

0.1.0 (2016-07-07)
------------------

Expand Down
5 changes: 4 additions & 1 deletion eridu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,7 @@

__author__ = 'Keiron J. Pizzey'
__email__ = 'kjpizzey@gmail.com'
__version__ = '0.1.0'
__version__ = '0.2.0'


from eridu.core import get_post_ids, split_post_ids, get_questions, get_answers, filter_posts_by_tag
55 changes: 51 additions & 4 deletions eridu/cli.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,60 @@
# -*- coding: utf-8 -*-

import time

import click
import schedule

from eridu.logger import logger
from eridu.config import FILTER_TAGS, SECONDS_BETWEEN_REQUESTS
from eridu.core import get_post_ids, split_post_ids, get_questions, get_answers, filter_posts_by_tag

@click.command()
def main(args=None):
@click.option('--start', default=1, help="Starting page for historic results.")
def main(start):
"""Console script for eridu"""
click.echo("Replace this message by putting your code into "
"eridu.cli.main")
click.echo("See click documentation at http://click.pocoo.org/")
logger.info('Starting main function in Eridu service.')

page = start or 0
params = {'page': page}
tags = [s.strip() for s in FILTER_TAGS.split(',')]

logger.info('Starting on page {}'.format(page))
logger.info('Filtering to include the following tags: {}.'.format(tags))

def run(params):
page = params.get('page')
logger.info('Getting posts for page {}.'.format(page))

post_ids = get_post_ids(page)
ids = split_post_ids(post_ids['items'])

questions = get_questions(ids['question_ids'])
questions = filter_posts_by_tag(questions['items'], tags)
for question in questions:
try:
print(question)
except UnicodeEncodeError:
pass

print('\n\n')

answers = get_answers(ids['answer_ids'])
answers = filter_posts_by_tag(answers['items'], tags)
for answer in answers:
try:
print(answer)
except UnicodeEncodeError:
pass

params['page'] += 1


schedule.every(SECONDS_BETWEEN_REQUESTS).seconds.do(run, params=params)

while True:
schedule.run_pending()
time.sleep(1)


if __name__ == "__main__":
Expand Down
24 changes: 24 additions & 0 deletions eridu/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os


ACCESS_KEY = os.environ.get('STACKEXCHANGE_REQUESTS_KEY')
SITE = os.environ.get('ERIDU_SITE', 'stackoverflow')

POST_IDS_URL = "https://api.stackexchange.com/2.2/posts"
POST_IDS_NUMBER = os.environ.get('ERIDU_POST_IDS_NUMBER', 100)
POST_IDS_FILTER = os.environ.get('ERIDU_POST_IDS_FILTER', '!3tz1WbZW5IHcz*twZ')
POST_IDS_SORT = os.environ.get('ERIDU_POST_IDS_SORT', 'creation')
POST_IDS_ORDER = os.environ.get('ERIDU_POST_IDS_ORDER', 'asc')

QUESTIONS_URL = "https://api.stackexchange.com/2.2/questions/{}"
QUESTIONS_FILTER = os.environ.get('ERIDU_QUESTIONS_FILTER', '!OfZM.T7F9gRuLlvhzHoyC1Fyd3oEOAMszsZJXvHk4mw')
QUESTIONS_SORT = os.environ.get('ERIDU_QUESTIONS_SORT', 'creation')
QUESTIONS_ORDER = os.environ.get('ERIDU_QUESTIONS_ORDER', 'asc')

ANSWERS_URL = "https://api.stackexchange.com/2.2/answers/{}"
ANSWERS_FILTER = os.environ.get('ERIDU_ANSWERS_FILTER', '!Fcazzsr2b3Mo6cWaRk)J*C-n25')
ANSWERS_SORT = os.environ.get('ERIDU_ANSWERS_SORT', 'creation')
ANSWERS_ORDER = os.environ.get('ERIDU_ANSWERS_ORDER', 'asc')

FILTER_TAGS = os.environ.get('ERIDU_FILTER_TAGS', 'python,python-2.x,python-3.x')
SECONDS_BETWEEN_REQUESTS = os.environ.get('ERIDU_SECONDS_BETWEEN_REQUESTS', 300)
109 changes: 109 additions & 0 deletions eridu/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import time

import requests

from eridu.logger import logger
from eridu.config import ACCESS_KEY, SITE
from eridu.config import POST_IDS_URL, POST_IDS_FILTER, POST_IDS_NUMBER, POST_IDS_SORT, POST_IDS_ORDER
from eridu.config import QUESTIONS_URL, QUESTIONS_FILTER, QUESTIONS_SORT, QUESTIONS_ORDER
from eridu.config import ANSWERS_URL, ANSWERS_FILTER, ANSWERS_SORT, ANSWERS_ORDER



def get_questions(question_ids, url=QUESTIONS_URL, filter=QUESTIONS_FILTER, access_key=ACCESS_KEY, site=SITE, sort=QUESTIONS_SORT, order=QUESTIONS_ORDER):
url = url.format(';'.join([str(i) for i in question_ids]))

payload = {
"pagesize": 100,
"key": access_key,
"site": site,
"sort": sort,
"order": order,
"filter": filter,
}

logger.info('Getting questions with payload: {}'.format(payload))

r = requests.get(url, params=payload)

data = r.json()

if data.get('backoff') is not None:
time.sleep(int(data.get('backoff')))

return data

def get_answers(answer_ids, url=ANSWERS_URL, filter=ANSWERS_FILTER, access_key=ACCESS_KEY, site=SITE, sort=ANSWERS_SORT, order=ANSWERS_ORDER):
url = url.format(';'.join([str(i) for i in answer_ids]))

payload = {
"pagesize": 100,
"key": access_key,
"site": site,
"sort": sort,
"order": order,
"filter": filter,
}

logger.info('Getting answers with payload: {}'.format(payload))

r = requests.get(url, params=payload)

data = r.json()

question_ids = [answer['question_id'] for answer in data['items']]

questions = get_questions(question_ids)

tags = {question['question_id']: question['tags'] for question in questions['items']}

for answer in data['items']:
answer['tags'] = tags[answer['question_id']]

if data.get('backoff') is not None:
time.sleep(int(data.get('backoff')))

return data


def get_post_ids(page, url=POST_IDS_URL, filter=POST_IDS_FILTER, n_posts=POST_IDS_NUMBER, access_key=ACCESS_KEY, site=SITE, sort=POST_IDS_SORT, order=POST_IDS_ORDER):
payload = {
"pagesize": n_posts,
"page": page,
"key": access_key,
"site": site,
"sort": sort,
"order": order,
"filter": filter,
}
logger.info('Getting post ids with payload: {}'.format(payload))

r = requests.get(url, params=payload)

data = r.json()

if data.get('backoff') is not None:
time.sleep(int(data.get('backoff')))

return data


def split_post_ids(post_ids):
logger.info('Splitting post ids into question and answer ids')

question_ids, answer_ids = [], []

for item in post_ids:
if item['post_type'] == "question":
question_ids.append(item['post_id'])
elif item['post_type'] == 'answer':
answer_ids.append(item['post_id'])

return {
"question_ids": question_ids,
"answer_ids": answer_ids
}

def filter_posts_by_tag(posts, tags):
tags = set(tags)
return [post for post in posts if set(post['tags']) & tags]
1 change: 0 additions & 1 deletion eridu/eridu.py

This file was deleted.

11 changes: 11 additions & 0 deletions eridu/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import logging

logger = logging.getLogger('eridu')

formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

ch = logging.StreamHandler()
ch.setFormatter(formatter)

logger.setLevel(logging.INFO)
logger.addHandler(ch)
18 changes: 0 additions & 18 deletions setup.cfg

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name='sopython-eridu',
version='0.1.0',
version='0.2.0',
description="Project Cradle library for accessing historic SO content.",
long_description=readme + '\n\n' + history,
author="Keiron J. Pizzey",
Expand Down

0 comments on commit b3bad2e

Please sign in to comment.