-
Notifications
You must be signed in to change notification settings - Fork 0
/
map
executable file
·64 lines (54 loc) · 2.34 KB
/
map
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python3
import json
import sqlalchemy
import pandas as pd
from sys import exit
from os import environ, path
from datetime import datetime
from datetime import date
from dateutil.relativedelta import *
def longmap(record):
if 'lng' in record:
return float(record['lng'])
else:
return float(record['Longitude'])
def latmap(record):
if 'lat' in record:
return float(record['lat'])
else:
return float(record['Latitude'])
if 'MAPPER_YEAR' in environ:
YEAR = int(environ['MAPPER_YEAR'])
else:
print("Using current year for YEAR")
YEAR = datetime.now().strftime('%Y')
DATE_RANGES = {
1: '[{year}-01-01, {year}-03-31]'.format(year=YEAR),
2: '[{year}-04-01, {year}-06-30]'.format(year=YEAR),
3: '[{year}-07-01, {year}-09-30]'.format(year=YEAR),
4: '[{year}-10-01, {year}-12-31]'.format(year=YEAR)
}
if 'MAPPER_QUARTER' in environ and int(environ['MAPPER_QUARTER']) in [1, 2, 3, 4]:
RANGE = DATE_RANGES[int(environ['MAPPER_QUARTER'])]
elif 'MAPPER_MONTH' in environ:
RANGE = '[{year}-{month}-01, {next_month}-01)'.format(
year=YEAR,
month=int(environ['MAPPER_MONTH']),
next_month=(date(YEAR, int(environ['MAPPER_MONTH']), 1) + relativedelta(months=+1)).strftime("%Y-%m")
)
else:
print("Getting data from last month. Otherwise you must set MAPPER_QUARTER in the environment, as 1, 2, 3, or 4")
RANGE = '[{year}-{month}-01, {year}-{next_month}-01)'.format(
year=YEAR,
month=int(datetime.now().strftime('%m'))-1,
next_month=datetime.now().strftime('%m')
)
engine = sqlalchemy.create_engine('postgresql://{}:{}@{}:{}/{}'.format(environ['DB_USER'], environ['DB_PASSWORD'], environ['DB_HOST'], environ['DB_PORT'], environ['DB_NAME']))
df = pd.read_sql_query(sqlalchemy.text('SELECT * FROM listings WHERE \'{range}\'::tsrange @> last_seen'.format(range=RANGE)), engine)
df.rename(columns={'posting_date': 'post_at'}, inplace=True)
df = df.dropna(subset=['payload'])
df['longitude'] = df['payload'].apply(longmap)
df['latitude'] = df['payload'].apply(latmap)
df = df[df['latitude'] > 37]
mapped = df[['uid', 'ask', 'bedrooms', 'title', 'address', 'post_at', 'created_at', 'updated_at', 'source_id', 'survey_id', 'latitude', 'longitude']]
mapped.to_csv(path.join(environ['OUT_FILE_PATH'], environ['OUT_FILE_NAME']), index=False, header=False)