Skip to content
Open
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
html/
.DS_Store
venv
*.pyc
*.pyc
*~*
*#*
20 changes: 10 additions & 10 deletions request.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@
import requests

def get_session(email, password, host='https://www.hackerschool.com'):
s = requests.session()
#host = 'http://localhost:5000'
# This request is to get the CSRF token (the point of which is to make sure other websites
# can't make requests on your behalf I think, something to with cross-site scripting
# http://en.wikipedia.org/wiki/Cross-site_request_forgery
r = s.get(host+'/login', verify=False)
m = re.search(r'<meta content="([a-zA-Z0-9/=+]+)" name="csrf-token"', r.content)
s = requests.session()
#host = 'http://localhost:5000'
# This request is to get the CSRF token (the point of which is to make sure other websites
# can't make requests on your behalf I think, something to with cross-site scripting
# http://en.wikipedia.org/wiki/Cross-site_request_forgery
r = s.get(host+'/login', verify=False)
m = re.search(r'<meta content="([a-zA-Z0-9/=+]+)" name="csrf-token"', r.content)
# This exactly mimics the POST requst that happens when you log in
payload = {
payload = {
'authenticity_token' : m.group(1),
'email': email,
'password' : password,
'commit':'Log In',
'utf8' : u'✓',
}
r = s.post(host+'/sessions', data=payload, verify=False)
return s
r = s.post(host+'/sessions', data=payload, verify=False)
return s

def download_reflections_pages():
'''downloads all reflection pages and stores them in a folder called html'''
Expand Down
93 changes: 59 additions & 34 deletions server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pymongo import Connection
import json
import os
from collections import defaultdict

app = Flask(__name__)
connection = Connection()
Expand All @@ -27,57 +28,81 @@ def get_icon():
#described above.

def get_JSON(name):

# debug
print "got request for " + name


# the tree that will be converted to json, and passed to the frontend
match_data = {}

match_data['name'] = name
match_data['children'] = []

doc = collection.find_one({'name':name})
print "name: " + name
my_keywords = doc['keywords'].keys()

kw_matches_by_name = {}
my_kws = doc['keywords'].keys()

for keyword in my_keywords:
word_data = {}
word_data['name'] = keyword
word_data['children'] = []
# contains each person, and their project similarity, through sum of keyword ratios
weight_sums = defaultdict(float)

# for each keyword 'name' used
for kw in my_kws:
kw_data = {}
kw_data['name'] = kw
kw_data['children'] = []

# how many times 'name' has used the kw. used to determine weight ratio
name_times = doc['keywords'][kw]

match_names = []
match_docs = collection.find({'keywords.'+keyword:{'$exists':True}}).sort('keywords.'+keyword, -1)
# all the other people who have used the kw, sorted by how many times they've used it
matches = collection.find({'keywords.'+kw:{'$exists':True}}).sort('keywords.'+kw, -1)

#INCLUDE TO LIMIT MATCHES BY WORD
#for match_doc in match_docs[:10]:
for match_doc in match_docs[:10]:
match_name = match_doc['name']
if match_name != name:
match_names.append(match_name)
kw_matches_by_name[match_name] = kw_matches_by_name.get(match_name, 0) + 1

# the [:10] slice limits how many people to check against, in order to reduce
# visual clutter on the final page
for match in matches[:10]:
mn = match['name']
if mn != name:
match_names.append(mn)

# make a child node for each person who used the keyword
for person in match_names:
# INCLUDE TO CONCEAL LAST NAMES
# space1 = person.find(" ")
# space2 = person.find(" ", space1+1)

person_data = {}
num_kw_matches = collection.find_one({'name':person})['keywords'][keyword]
person_data['name'] = (person
# INCLUDE TO CONCEAL LAST NAMES
# [:space1]
+ " (" + str(num_kw_matches) + ")" )
word_data['children'].append(person_data)

match_data['children'].append(word_data)

match_data['top'] = max(kw_matches_by_name, key=kw_matches_by_name.get)

num_kw_matches = collection.find_one({'name':person})['keywords'][kw]

# figure out the weight to assign the matches. We want the lowest ratio.
# need a better way to make these floats
r1 = 1.0 * name_times / num_kw_matches
r2 = 1.0 * num_kw_matches / name_times

# final ratio (for weighting)
ratio = 0
if r1 > r2:
ratio += r2
else:
ratio += r1

# sum of ratios, per person
weight_sums[person] += ratio

# how the person's name will appear on the page
person_data['name'] = (person +
" (" +
str(ratio)[:5] +
")" )
# add this entry to the people per keyword
kw_data['children'].append(person_data)

# add this keyword's data to the tree
match_data['children'].append(kw_data)

# select the person with the highest score
match_data['top'] = max(weight_sums, key=weight_sums.get)

# dump it
return json.dumps(match_data)

if __name__ == '__main__':
port = int(os.environ.get('PORT', 80))
app.run(host='0.0.0.0', port=port)
app.run(host='127.0.0.1', port=port)