-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_ethnicity.py
54 lines (45 loc) · 1.95 KB
/
add_ethnicity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import requests
from tqdm import tqdm
from pandarallel import pandarallel
API_KEY = "f9832b6322bc14f4"
def add_ethnicity(f,new_name):
tqdm.pandas()
pandarallel.initialize(progress_bar = True)
df = pd.read_csv(f)
df["est_ethnicity"] = df.parallel_apply(lambda x : make_request(x['FName'],x['LName']), axis=1)
print("df size before dropping null ethnicity",df.shape)
df = df[df['est_ethnicity'].notnull()]
print("df size after dropping null ethnicity",df.shape)
df.to_csv(new_name)
def make_request(first_name, last_name):
try:
url = "http://www.name-prism.com/api_token/eth/json/" + API_KEY + "/" + first_name + "%20" + last_name
r = requests.get(url = url, params = {})
data = r.json()
to_return = ""
current_max = 0
for opt in data:
if data[opt] > current_max:
current_max = data[opt]
to_return = opt
return to_return
except:
return None
if __name__ == '__main__':
merged_2019 = "results/2019_vouchers_voter_file_merged.csv"
merged_2017 = "results/2017_vouchers_voter_file_merged.csv"
new_2019_name = "results/2019_vouchers_voter_file_ethnicity.csv"
new_2017_name = "results/2017_vouchers_voter_file_ethnicity.csv"
# print("add ethnicity for 2019 vouchers")
# add_ethnicity(merged_2019,new_2019_name)
# #df size before dropping null ethnicity (10465, 51)
# #df size after dropping null ethnicity (10463, 51)
# print("add ethnicity for 2017 vouchers")
# add_ethnicity(merged_2017,new_2017_name)
# no change: df size after dropping null ethnicity (22534, 68)
print("add ethnicity for 2019 cash")
add_ethnicity("results/2019_cash_merged.csv","results/2019_cash_ethnicity.csv")
# no change: df size after dropping null ethnicity (6602, 68)
print("add ethnicity for 2017 cash")
add_ethnicity("results/2017_cash_merged.csv","results/2017_cash_ethnicity.csv")