-
Notifications
You must be signed in to change notification settings - Fork 0
/
intmaker.py
45 lines (37 loc) · 1.24 KB
/
intmaker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import sys
import csv
csv.field_size_limit(sys.maxsize)
def build_hash_maps(data):
hash_maps = {}
for row in data:
for col_idx, value in enumerate(row):
if col_idx not in hash_maps:
hash_maps[col_idx] = {}
if value not in hash_maps[col_idx]:
hash_maps[col_idx][value] = len(hash_maps[col_idx])
return hash_maps
def replace_with_index(data, hash_maps):
for row_idx, row in enumerate(data):
for col_idx, value in enumerate(row):
data[row_idx][col_idx] = hash_maps[col_idx][value]
def main(delimiter):
data = []
header = None
input_csv = csv.reader(sys.stdin, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
for row in input_csv:
if header is None:
header = row
else:
data.append(row)
hash_maps = build_hash_maps(data)
replace_with_index(data, hash_maps)
output_csv = csv.writer(sys.stdout, delimiter=delimiter, quoting=csv.QUOTE_MINIMAL)
output_csv.writerow(header)
for row in data:
output_csv.writerow(row)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python script.py <delimiter>")
sys.exit(1)
delimiter = sys.argv[1]
main(delimiter)