-
Notifications
You must be signed in to change notification settings - Fork 1
/
process.py
executable file
·123 lines (89 loc) · 3.4 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python3
from sys import exit
from glob import glob
from datetime import datetime
from random import getrandbits
from tempfile import gettempdir
from os import listdir, mkdir, makedirs, path, system, unlink
from shutil import copy, copytree, make_archive, rmtree
def _clear_directory(dir):
for fd in listdir(dir):
if fd == '.gitkeep':
continue
file_path = path.join(dir, fd)
try:
if path.isfile(file_path):
unlink(file_path)
elif path.isdir(file_path):
_clear_directory(file_path)
except Exception as e:
print(e)
def cwd():
return path.dirname(path.realpath(__file__))
# Define run steps for each processing component
def clear_directories():
_clear_directory(path.join(cwd(), 'volumes', 'cleaner', 'csv'))
_clear_directory(path.join(cwd(), 'volumes', 'cleaner', 'output'))
_clear_directory(path.join(cwd(), 'volumes', 'geolocator', 'data'))
_clear_directory(path.join(cwd(), 'volumes', 'geolocator', 'output'))
def run_mapper():
print("[--] Running Mapper")
if system('docker-compose up mapper') != 0:
print('[!!] Mapper failed')
exit(1)
def run_cleaner():
print("[--] Running Cleaner")
if system('docker-compose up cleaner') != 0:
print('[!!] Cleaner failed')
exit(1)
print("[--] Preparing cleaned data for geolocation")
src_dir = path.join(cwd(), 'volumes', 'cleaner', 'output')
src = glob(path.join(src_dir, '*_listings_unique.csv'))
if len(src) > 0:
src = src[0]
dest = path.join(cwd(), 'volumes', 'geolocator', 'data')
try:
copy(src, dest)
except Exception as e:
print("[!!] Could not copy cleaned listings for geolocator")
print(e)
exit(1)
def run_geolocator():
print("[--] Running Geolocator")
if system('docker-compose up geolocator') != 0:
print('[!!] Geolocator failed')
exit(1)
print("[--] Bundling output")
env = {}
with open(path.join(cwd(), ".env.mapper")) as fd:
for line in fd:
tokens = line.split('=')
if len(tokens) > 1:
name, var = line.split('=')
env[name.strip()] = str(var).strip()
if 'MAPPER_MONTH' in env:
archive_name = 'rental-listings_%s-%s' % (env['MAPPER_MONTH'], env['MAPPER_YEAR'])
else:
archive_name = 'rental-listings_Q%s-%s' % (env['MAPPER_QUARTER'], env['MAPPER_YEAR'])
tempdir = path.join(gettempdir(), 'rla-out-%0x' % getrandbits(40))
workdir = path.join(tempdir, archive_name)
workdir_cleaner = path.join(workdir, 'cleaner')
workdir_geolocator = path.join(workdir, 'geolocator')
try:
makedirs(workdir, 0o700, exist_ok=True)
except OSError:
print("[!!] Could not create %s. You will have to bundle your own output." % workdir)
exit(1)
copytree(path.join(cwd(), 'volumes', 'cleaner', 'output'), workdir_cleaner)
copytree(path.join(cwd(), 'volumes', 'geolocator', 'output'), workdir_geolocator)
make_archive(path.join(cwd(), "files", archive_name), 'zip', tempdir)
try:
rmtree(tempdir)
except OSError:
print("[!!] Could not delete tempdir %s. Restarting machine will clear all files in temp directory." % workdir)
# Run processor
clear_directories()
run_mapper()
run_cleaner()
run_geolocator()
print("[--] Rental Listings Processed")