-
Notifications
You must be signed in to change notification settings - Fork 2
/
hitchwiki.py
56 lines (42 loc) · 2.28 KB
/
hitchwiki.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import numpy as np
import folium
import json
import folium.plugins
import sqlite3
from html import unescape
import os
import datetime
to_df = lambda x: pd.DataFrame(x.tolist(), index=x.index)
good = pd.read_json('good.json')[0]
good = good.str.split('\n').str[-1]
df = pd.DataFrame(good.apply(json.loads).tolist())
df = df[df.rating != '0']
df['country'] = df.location.apply(lambda x: x['country']['iso'])
df['wait'] = df.waiting_stats.apply(lambda x: x['avg'] if x else None).astype(float)
df = df.assign(**to_df(df.user.dropna()))
df['spot_datetime'] = df.datetime
df['spot_name'] = df.name
# df.loc[df.comments_count==0, 'comments'] = None
explode_df = df.explode('comments').reset_index()
# explode_df.assign(**explode_df.comments.dropna().apply(pd.Series))
explode_df = explode_df.assign(**to_df(explode_df.comments.dropna()))
explode_df = explode_df.assign(**to_df(explode_df.user.dropna()))
# comment_df.datetime = pd.to_datetime(comment_df.datetime)
explode_df[['lat', 'lon', 'rating']] = explode_df[['lat', 'lon', 'rating']].astype(float)
explode_df.rating = 6 - explode_df.rating
explode_df.comment = explode_df.comment.dropna().apply(unescape)
explode_df.loc[explode_df.comment.isnull(), 'datetime'] = explode_df.spot_datetime
explode_df.loc[explode_df.comment.isnull(), 'name'] = explode_df.spot_name
explode_df.name = explode_df.name + ' (Hitchwiki)'
explode_df.datetime += '.000000'
explode_df['reviewed'] = True
explode_df['banned'] = explode_df.comment.isin(['Got a ride from the Pope of Dope.', 'Whoever added this spot is genius xD', 'After 4 hours a camel finally picked me up, he was very friendly.'])
explode_df['ip'] = None
explode_df['dest_lat'] = explode_df['dest_lon'] = np.nan
# so sqlite understands they are floats
explode_df.loc[explode_df.comment=='Got a ride after 10 minutes withhout even bothering straight to Lure close to Switzerland which was where I needed to go. Lovely', ['dest_lat', 'dest_lon']] = [47.6864, 6.4943]
cols = ['lat', 'lon', 'rating', 'country', 'wait', 'name', 'comment', 'datetime',
'reviewed', 'banned', 'ip', 'dest_lat', 'dest_lon']
explode_df[cols].to_sql('points', sqlite3.connect('points.sqlite'), index_label='id', if_exists='replace')
# create unique index unique_comment on points(lat, lon, comment) where comment is not null;