-
Notifications
You must be signed in to change notification settings - Fork 21
/
Copy pathelection_results.py
121 lines (99 loc) · 3.9 KB
/
election_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
A monstrosity of an election results script. Calculates total votes for
races and candidates, and determines if there is a winner in each race.
This module bundles together way too much functionality and is near impossible
to test, beyond eye-balling results.
USAGE:
python election_results.py
OUTPUT:
summary_results.csv
"""
import csv
import urllib
from operator import itemgetter
from collections import defaultdict
from os.path import dirname, join
# Download CSV of fake Virginia election results to root of project
url = "https://docs.google.com/spreadsheet/pub?key=0AhhC0IWaObRqdGFkUW1kUmp2ZlZjUjdTYV9lNFJ5RHc&output=csv"
filename = join(dirname(dirname(__file__)), 'fake_va_elec_results.csv')
urllib.urlretrieve(url, filename)
# Create reader for ingesting CSV as array of dicts
reader = csv.DictReader(open(filename, 'rb'))
# Use defaultdict to automatically create non-existent keys with an empty dictionary as the default value.
# See https://pydocs2cn.readthedocs.org/en/latest/library/collections.html#defaultdict-objects
results = defaultdict(dict)
# Initial data clean-up
for row in reader:
# Parse name into first and last
row['last_name'], row['first_name'] = [name.strip() for name in row['candidate'].split(',')]
# Convert total votes to an integer
row['votes'] = int(row['votes'])
# Store county-level results by slugified office and district (if there is one),
# then by candidate party and raw name
race_key = row['office']
if row['district']:
race_key += "-%s" % row['district']
# Create unique candidate key from party and name, in case multiple candidates have same
cand_key = "-".join((row['party'], row['candidate']))
# Below, setdefault initializes empty dict and list for the respective keys if they don't already exist.
race = results[race_key]
race.setdefault(cand_key, []).append(row)
# Tally votes for Races and candidates and assign winners
summary = defaultdict(dict)
for race_key, cand_results in results.items():
all_votes = 0
cands = []
for cand_key, results in cand_results.items():
# Populate a new candidate dict using one set of county results
cand = {
'first_name': results[0]['first_name'],
'last_name': results[0]['last_name'],
'party': results[0]['party'],
'winner': '',
}
# Calculate candidate total votes
cand_total_votes = sum([result['votes'] for result in results])
cand['votes'] = cand_total_votes
# Add cand totals to racewide vote count
all_votes += cand_total_votes
# And stash the candidate's data
cands.append(cand)
# sort cands from highest to lowest vote count
sorted_cands = sorted(cands, key=itemgetter('votes'), reverse=True)
# Determine winner, if any
first = sorted_cands[0]
second = sorted_cands[1]
if first['votes'] != second['votes']:
first['winner'] = 'X'
# Get race metadata from one set of results
result = cand_results.values()[0][0]
# Add results to output
summary[race_key] = {
'date': result['date'],
'office': result['office'],
'district': result['district'],
'all_votes': all_votes,
'candidates': sorted_cands,
}
# Write CSV of results
outfile = join(dirname(__file__), 'summary_results.csv')
with open(outfile, 'wb') as fh:
# We'll limit the output to cleanly parsed, standardized values
fieldnames = [
'date',
'office',
'district',
'last_name',
'first_name',
'party',
'all_votes',
'votes',
'winner',
]
writer = csv.DictWriter(fh, fieldnames, extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
for race, results in summary.items():
cands = results.pop('candidates')
for cand in cands:
results.update(cand)
writer.writerow(results)