forked from gringer/bioinfscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gtsubset.pl
executable file
·86 lines (74 loc) · 1.67 KB
/
gtsubset.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl
# gtsubset.pl -- subsets a simplegt file using an individual list
use warnings;
use strict;
use FileHandle;
use Getopt::Long qw(:config auto_version auto_help pass_through);
my $idFileName = "";
my %ids = ();
GetOptions(
'ids=s' => \$idFileName,
);
my @fileArgs = ();
foreach my $arg (@ARGV){
if(-f $arg){
$idFileName = $arg;
} else {
$ids{$arg} = 1;
}
}
@ARGV = @fileArgs;
if($idFileName){
print(STDERR "Reading in id file... ");
open(my $idFile, "< $idFileName")
or die("cannot open $idFileName for reading");
while(<$idFile>){
chomp;
s/(\s|,).*$//;
$ids{$_} = 1;
}
close($idFile);
print(STDERR "done!\n");
}
if(!%ids){
print(STDERR "Error: no IDs specified. Cannot continue.\n");
exit(1);
}
my %filteredIDs = ();
my @idOrder = ();
my @colOrder = ();
while(<>){
chomp;
if(/^#/){
s/<Individual.*?://g;
s/##//g;
s/>//g;
s/^\s+//;
s/\s+$//;
my @origIDs = split(/\s+/);
for(my $i = 0; $i <= $#origIDs; $i++){
if($ids{$origIDs[$i]}){
$filteredIDs{$origIDs[$i]} = $i;
push(@idOrder, $origIDs[$i]);
push(@colOrder, $i);
}
}
if(!(@idOrder)){
print(STDERR "Error: no specified IDs found. Cannot continue.\n");
exit(1);
}
if(scalar(@idOrder) < scalar(keys(%ids))){
printf(STDERR "Warning: some ID values were not found:\n");
for my $id (keys(%ids)){
if(!$filteredIDs{$id}){
print(STDERR " $id");
}
}
print(STDERR "\n");
}
printf("## <Individual/Column IDs: %s > ##\n", join(" ",@idOrder));
next;
}
my ($marker, @gts) = split(/\s+/);
printf("%-15s %s\n", $marker, join(" ", @gts[@colOrder]));
}