forked from mpayson/esri-partner-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcool_utils.py
125 lines (97 loc) · 3.34 KB
/
cool_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Functions that I want to remember and hopefully you will too!"""
import glob
import os
import json
import pandas as pd
def chunk(row, n=1000):
"""chunk generator function for breaking up requests
such as for Esri's geocoder
args:
row - the object to be chunked
n - chunk size
"""
for i in range(0, len(row), n):
yield row[i:i + n]
def chunk_df(df, n=1000):
"""chunk generator function for breaking up requests with dataframes
args:
df - the dataframe to be chunked
n - chunk size
"""
for i in range(0, len(df), n):
yield df.iloc[i:i + n].copy()
def csvs_to_df(dir_path):
"""concats all csvs in a directory to one dataframe
args:
dir_path - path to dir containing csvs
"""
all_csvs = glob.glob(os.path.join(dir_path, '*.csv'))
return pd.concat((pd.read_csv(f) for f in all_csvs if os.stat(f).st_size > 0))
def extract(obj, keys, **kwargs):
"""returns a nested object value for the specified keys
args:
obj -- the nested object containing the desired value
keys -- list of keys to drill through object
"""
required = kwargs.pop('required', False)
default = kwargs.pop('default', None)
warn = kwargs.pop('warn', False)
o = obj
for i in range(0, len(keys)):
try:
o = o[keys[i]]
except (KeyError, IndexError):
if warn:
print('Warning key does not exist. Key: {0} in Keys: {1}'.format(keys[i], keys))
if required and default == None:
raise KeyError('Required key does not exist in object and no default')
return default
return o
def d_extract(obj, keys_delimited, **kwargs):
"""returns a nested object value for '.' delimited keys
args:
obj -- the nested object containing the desired value
keys -- a '.' delimited string of keys to drill through"""
keys = keys_delimited.split('.')
return extract(obj, keys, **kwargs)
def read_json(path):
"""Read in a JSON file as a dictionary
args:
path - path to the JSON file"""
if os.path.exists(path):
with open(path, 'r') as f:
return json.load(f)
return {}
def write_json(path, obj):
"""Write a dictionary to a file
args:
path -- path to the output JSON file
obj -- dictionary to write"""
with open(path, 'w') as f:
json.dump(obj, f)
def memoize(f):
"""Decorator to memoize function calls that receive a list
as their first argument, useful to avoid expensive operations
args:
f -- the function to memoize"""
cache = {}
def execute(input_list, *args, **kwargs):
cache_path = kwargs.pop('cache_path', None)
get_key = kwargs.pop('get_key', lambda i: str(i))
if cache_path:
cache.update(read_json(cache_path))
keys = set()
process = []
for i in input_list:
key = get_key(i)
if key not in cache and key not in keys:
process.append(i)
keys.add(key)
if len(process) > 0:
results = f(process, *args, **kwargs)
for i, p in enumerate(process):
cache[get_key(p)] = results[i]
if cache_path:
write_json(cache_path, cache)
return [cache[get_key(i)] for i in input_list]
return execute