rtunney · trobertson · Dec 4, 2012 · Dec 4, 2012 · Dec 4, 2012 · Dec 4, 2012
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,6 @@
 html/
 .DS_Store
 venv
-*.pyc
+*.pyc
+*~*
+*#*
diff --git a/request.py b/request.py
@@ -8,23 +8,23 @@
 import requests
 
 def get_session(email, password, host='https://www.hackerschool.com'):
-    s = requests.session()
-    #host = 'http://localhost:5000'
-    # This request is to get the CSRF token (the point of which is to make sure other websites
-    #  can't make requests on your behalf I think, something to with cross-site scripting
-    #  http://en.wikipedia.org/wiki/Cross-site_request_forgery
-    r = s.get(host+'/login', verify=False)
-    m = re.search(r'<meta content="([a-zA-Z0-9/=+]+)" name="csrf-token"', r.content)
+	s = requests.session()
+	#host = 'http://localhost:5000'
+	# This request is to get the CSRF token (the point of which is to make sure other websites
+	#  can't make requests on your behalf I think, something to with cross-site scripting
+	#  http://en.wikipedia.org/wiki/Cross-site_request_forgery
+	r = s.get(host+'/login', verify=False)
+	m = re.search(r'<meta content="([a-zA-Z0-9/=+]+)" name="csrf-token"', r.content)
     # This exactly mimics the POST requst that happens when you log in
-    payload = {
+	payload = {
         'authenticity_token' : m.group(1),
         'email': email,
         'password' : password,
         'commit':'Log In',
         'utf8' : u'✓',
         }
-    r = s.post(host+'/sessions', data=payload, verify=False)
-    return s
+	r = s.post(host+'/sessions', data=payload, verify=False)
+	return s
 
 def download_reflections_pages():
     '''downloads all reflection pages and stores them in a folder called html'''

diff --git a/server.py b/server.py
@@ -2,6 +2,7 @@
 from pymongo import Connection
 import json
 import os
+from collections import defaultdict
 
 app = Flask(__name__)
 connection = Connection()
@@ -27,57 +28,81 @@ def get_icon():
 	#described above.
 
 def get_JSON(name):
-
+	# debug
 	print "got request for " + name
-
+
+	# the tree that will be converted to json, and passed to the frontend
 	match_data = {}
 
 	match_data['name'] = name
 	match_data['children'] = []
 
 	doc = collection.find_one({'name':name})
-	print "name: " + name
-	my_keywords = doc['keywords'].keys()
 
-	kw_matches_by_name = {}
+	my_kws = doc['keywords'].keys()
 
-	for keyword in my_keywords:
-		word_data = {}
-		word_data['name'] = keyword
-		word_data['children'] = []
+	# contains each person, and their project similarity, through sum of keyword ratios
+	weight_sums = defaultdict(float)
 
+	# for each keyword 'name' used
+	for kw in my_kws:
+		kw_data = {}
+		kw_data['name'] = kw
+		kw_data['children'] = []
+
+		# how many times 'name' has used the kw. used to determine weight ratio
+		name_times = doc['keywords'][kw]
+
 		match_names = []
-		match_docs = collection.find({'keywords.'+keyword:{'$exists':True}}).sort('keywords.'+keyword, -1)
+		# all the other people who have used the kw, sorted by how many times they've used it
+		matches = collection.find({'keywords.'+kw:{'$exists':True}}).sort('keywords.'+kw, -1)
 
-		#INCLUDE TO LIMIT MATCHES BY WORD
-		#for match_doc in match_docs[:10]:
-		for match_doc in match_docs[:10]:
-			match_name = match_doc['name']
-			if match_name != name:
-				match_names.append(match_name)
-				kw_matches_by_name[match_name] = kw_matches_by_name.get(match_name, 0) + 1
-
+		# the [:10] slice limits how many people to check against, in order to reduce
+		# visual clutter on the final page
+		for match in matches[:10]:
+			mn = match['name']
+			if mn != name:
+				match_names.append(mn)
+
+		# make a child node for each person who used the keyword
 		for person in match_names:
-			# INCLUDE TO CONCEAL LAST NAMES
-			# space1 = person.find(" ")
-			# space2 = person.find(" ", space1+1)
-
 			person_data = {}
-			num_kw_matches = collection.find_one({'name':person})['keywords'][keyword]
-			person_data['name'] = (person
-			# INCLUDE TO CONCEAL LAST NAMES
-			# [:space1] 
-			+ " (" + str(num_kw_matches) + ")" )
-			word_data['children'].append(person_data)
-
-		match_data['children'].append(word_data)
-
-	match_data['top'] = max(kw_matches_by_name, key=kw_matches_by_name.get)
-
+			num_kw_matches = collection.find_one({'name':person})['keywords'][kw]
+
+			# figure out the weight to assign the matches. We want the lowest ratio.
+			# need a better way to make these floats
+			r1 = 1.0 * name_times / num_kw_matches
+			r2 = 1.0 * num_kw_matches / name_times
+
+			# final ratio (for weighting)
+			ratio = 0
+			if r1 > r2:
+				ratio += r2
+			else:
+				ratio += r1
+
+			# sum of ratios, per person
+			weight_sums[person] += ratio
+
+			# how the person's name will appear on the page
+			person_data['name'] = (person +
+								   " (" +
+								   str(ratio)[:5] +
+								   ")" )
+			# add this entry to the people per keyword
+			kw_data['children'].append(person_data)
+
+		# add this keyword's data to the tree
+		match_data['children'].append(kw_data)
+
+	# select the person with the highest score
+	match_data['top'] = max(weight_sums, key=weight_sums.get)
+
+	# dump it
 	return json.dumps(match_data)
 
 if __name__ == '__main__':
 	port = int(os.environ.get('PORT', 80))
-	app.run(host='0.0.0.0', port=port)
+	app.run(host='127.0.0.1', port=port)