-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplitbycol
executable file
·75 lines (64 loc) · 1.76 KB
/
splitbycol
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
import sys
import csv
import getopt
import os
import math
import gzip
import shutil
def run(inp, out, n, gz_in, gz_out, force, in_all):
if os.path.exists(out) and force:
shutil.rmtree(out)
os.makedirs(out)
def myopen_out(path,mode):
if gz_out:
return gzip.open(path+".gz",mode)
else:
return open(path,mode)
myopen_in = gzip.open if gz_in else open
fs = map(lambda i: myopen_out(os.path.join(out,'columns-%d' % i), 'wb'),
xrange(n))
ws = map(csv.writer, fs)
try:
with (sys.stdin if inp == '-' else myopen_in(inp)) as f:
r = csv.reader(f, delimiter=',', quotechar='"')
m = None
idx_groups = None
for row in r:
if not m:
m = int(max(math.ceil(float(len(row)) / n), n))
idx_groups = map(lambda i: (i, filter(lambda x: x < len(row), list(in_all.union(set(range((i * m), (i * m) + m)))))),
xrange(n))
for (i, idxs) in idx_groups:
vs = map(lambda j: row[j], idxs)
ws[i].writerow(vs)
finally:
for f in fs:
f.close()
if __name__ == '__main__':
optlist, args = getopt.getopt(sys.argv[1:],
'i:o:zn:fa:Z')
inp = '-'
gz_in = False
gz_out = False
force = False
in_all = set([])
for k, v in optlist:
if k == '-i':
inp = v
elif k == '-o':
out = v
elif k == '-Z':
gz_in = True
elif k == '-z':
gz_out = True
elif k == '-n':
n = int(v)
elif k == '-f':
force = True
elif k == '-a':
in_all = set(map(lambda x: int(x.strip()) - 1, v.split(",")))
assert min(in_all) >= 0, "-a values must be >= 0"
else:
assert False, "unhandled option " + k
run(inp, out, n, gz_in, gz_out, force, in_all)