-
Notifications
You must be signed in to change notification settings - Fork 1
/
subset-join.py
executable file
·37 lines (30 loc) · 1.16 KB
/
subset-join.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python
# for doing a join on files that you know are sorted the same way and one is a
# subset of the other
import sys
import argparse
# ----- command line parsing -----
parser = argparse.ArgumentParser(
description="Does a join on column 1 for files where one is a subset of the other and the"
"are sorted the same way.")
parser.add_argument("big_file", type=str,
help="Big file (superset).")
parser.add_argument("little_file", type=str,
help="Little file (subset).")
args = parser.parse_args()
# ----- end command line parsing -----
big = open(args.big_file)
little = open(args.little_file)
n = 0
for little_line in little:
little_split = little_line.split()
for big_line in big:
n += 1
sys.stderr.write("\r{:d} read".format(n))
big_split = big_line.split()
if big_split[0] == little_split[0]:
sys.stdout.write("{:s}\t{:s}\t{:s}\n".format(big_split[0],
"\t".join(big_split[1:]),
"\t".join(little_split[1:])))
break
sys.stderr.write("\n")