forked from gringer/bioinfscripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GenABEL2GIANT.pl
executable file
·112 lines (104 loc) · 2.76 KB
/
GenABEL2GIANT.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/perl
use warnings;
use strict;
use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
use IO::Compress::Gzip qw(gzip $GzipError);
my ($snpName, $mafName, @giantNames) = @ARGV;
my %chipSNPs = ();
my %MAFs = ();
print(STDERR "Reading SNP file...");
my $snpFile = new IO::Uncompress::Gunzip("$snpName") or
die("Unable to open $snpName\n");
my $read = 0;
while(<$snpFile>){
chomp;
$chipSNPs{$_} = 1;
if($read++ > 10000){
print(STDERR ".");
$read = 0;
}
}
close($snpFile);
print(STDERR " done.\n");
print(STDERR "Reading MAF file...");
my $mafFile = new IO::Uncompress::Gunzip("$mafName") or
die("Unable to open $mafName\n");
$read = 0;
while(<$mafFile>){
chomp;
if(/^Marker/){
next;
}
my ($marker, $sex, $base, $MAF) = split(/,/);
$MAFs{$sex}{$marker}="$base,$MAF";
if($read++ > 300000){
print(STDERR ".");
$read = 0;
}
}
close($mafFile);
print(STDERR " done.\n");
foreach my $giantName (@giantNames){
my $sexCode = "";
if ($giantName =~ /NI_([fmc]).*?_IMPUTE2/) {
$sexCode = ($1 eq "c") ? "a" : $1;
}
my $giantFile = new IO::Uncompress::Gunzip("$giantName") or
die("Unable to open $giantName\n");
print(STDERR "Reading GIANT file [$giantName]...");
$read = 0;
my $of = new IO::Compress::Gzip "GIANT_${giantName}" or
die "Unable to open GIANT_${giantName} for writing\n";
print($of "chrom,markername,strand,n,effect_allele,".
"other_allele,eaf,imputation_type,chi2.1df,beta,".
"se,p\n");
while (<$giantFile>) {
chomp;
tr/\"//d;
if (substr($_,0,1) eq ",") {
next;
}
my ($markername, $chrom, $pos, $strand, $A1, $A2, $n, $beta, $se,
$chi2_1df, $P1df, $p, $effAB, $effBB, $chi2_2df, $P2df,
$effect_allele) = split(/,/);
my ($maa,$maf) = split(/,/, $MAFs{$sexCode}{$markername});
if (($maf == 1) || ($maf == 0)) {
next; # skip homozygous / missing SNPs
}
my $eaf = $maf;
my $other_allele = $A1;
if ($maa ne $effect_allele) {
$eaf = 1 - $maf;
}
$eaf = sprintf("%0.4f", $eaf);
if ($other_allele eq $effect_allele) {
$other_allele = $A2;
}
my $imputation_type = 4;
if ($chipSNPs{$markername}) {
$imputation_type = 0;
}
if($chi2_1df ne "NA"){
$chi2_1df = sprintf("%0.4f", $chi2_1df);
}
if($beta ne "NA"){
$beta = sprintf("%0.4f", $beta);
}
if($se ne "NA"){
$se = sprintf("%0.6f", $se);
}
if($p ne "NA"){
$p = sprintf("%0.4f", $p);
}
print($of "$chrom,$markername,$strand,$n,$effect_allele,".
"$other_allele,$eaf,$imputation_type,$chi2_1df,$beta,".
"$se,$p\n");
if($read++ > 400000){
print(STDERR ".");
$read = 0;
}
}
close($of);
close($giantFile);
print(STDERR " done.\n");
}