diff --git a/rp_bin/danerdir_6 b/rp_bin/danerdir_6 index 97aa2cb..9a6f522 100755 --- a/rp_bin/danerdir_6 +++ b/rp_bin/danerdir_6 @@ -68,7 +68,7 @@ version: $version --ploc STRING location (absolute path) of plink, if other than in the path --help print this message and exit --outdir STRING outdir - --indir STRING indir, where the data is + --indir STRING indir, where the data is --mds STRING mds-file --idex STRING idex-file --idin STRING idin-file @@ -107,12 +107,12 @@ version: $version batch_identifier: files matching this should all have no double id-entries - expects a running plink and beagle version somewhere in the path, + expects a running plink and beagle version somewhere in the path, otherwise specify a location with --ploc or --beoc (e.g. /home/user/plink/plink) beagle at least in version 3.1 created by Stephan Ripke 2008 at MGH, Boston, MA - + "; my $range_file = "NORANGEFILE"; @@ -120,7 +120,7 @@ my $sout = ""; #### evaluate options use Getopt::Long; -GetOptions( +GetOptions( "help"=> \my $help, "ploc=s"=> \$p2loc, "outdir=s"=> \$outdir, @@ -314,16 +314,16 @@ if ($bfile) { } -else { +else { unless ($dosf_file) { # &mysystem("cp $indir/*$bind.* .") unless (-e $famfile); - + &mysystem("cp $indir/$dosfile .") unless (-e $dosfile); &mysystem("cp $indir/$mapfile .") unless (-e $mapfile); &mysystem("cp $indir/$famfile .") unless (-e $famfile); &mysystem("cp $indir/$ngtfile .") unless (-e $ngtfile); - - + + } else { &mysystem("cp $rootdir/$dosf_file .") unless (-e $dosf_file); @@ -379,18 +379,18 @@ my $score_w_file = "score.w.file"; my $counts = 0; if ($danscore_file) { - unless (-e $danscore_nogz) { + unless (-e $danscore_nogz) { &mysystem("cp $rootdir/$danscore_file .") ; if ($danscore_file =~ /gz$/) { - &mysystem("zcat $danscore_file > $danscore_nogz"); + &mysystem("zcat $danscore_file > $danscore_nogz"); } } -# &mysystem("cp $rootdir/$danscore_file .") unless (-e $danscore_file); +# &mysystem("cp $rootdir/$danscore_file .") unless (-e $danscore_file); &mysystem("cp $rootdir/$range_file .") unless (-e $range_file); - + #### write score and weight file. - + unless (-e $score_p_file) { my $beta = 0; print "read map\n" if ($debug); @@ -470,13 +470,13 @@ if ($danscore_file) { print PFILE $snpname; print PFILE "\t".$pv; print PFILE "\n"; - + print WFILE $snpname; print WFILE "\t".$a1; print WFILE "\t".log($or); print WFILE "\n"; } - + } close IFILE; @@ -486,7 +486,7 @@ if ($danscore_file) { if ($counts ==0) { print "no SNPs for polyscore left for this chunk\n" if ($debug); &mysystem("echo chunk empty > $outdir/dan_$workname.profile.log"); - + exit; } @@ -524,40 +524,40 @@ if ($mds_name){ if ($addcov_name ){ - + die "$addcov_name:".$! unless open IFILE, "< $addcov_name"; my $head = ; - + my @cells = &split_line($head); my $id = $cells[0]."\t".$cells[1] ; - + foreach (2..$#cells) { $coco_str .= ",".$cells[$_]; $addcov{$id} .= "\t".$cells[$_]; } - + while (my $line = ){ my @cells = &split_line($line); my $id = $cells[0]."\t".$cells[1] ; foreach (2..$#cells) { $addcov{$id} .= "\t".$cells[$_]; } - + # print $id.$addcov{$id}."\n"; } close IFILE; - - - + + + die "$mds_name:".$! unless open IFILE, "< $mds_name"; die "$mds_name.addcov:".$! unless open OFILE, "> $mds_name.addcov"; die "$mds_name.addcov.ex:".$! unless open EFILE, "> $mds_name.addcov.ex"; - + while (my $line = ){ chomp($line); my @cells = &split_line($line); my $id = $cells[0]."\t".$cells[1] ; - + if (exists $addcov{$id}) { print OFILE $line.$addcov{$id}."\n"; } @@ -568,7 +568,7 @@ if ($mds_name){ close IFILE; close OFILE; close EFILE; - + } } @@ -582,7 +582,7 @@ if ($mds_name){ if ($mds_name){ - + foreach (@mcols) { $coco_str .= ",C$_"; @@ -631,18 +631,18 @@ else { ########################################################### ### work here #################################################### - + die "$addcov_name:".$! unless open IFILE, "< $addcov_name"; my $head = ; - + my @cells = &split_line($head); my $id = $cells[0]."\t".$cells[1] ; - + foreach (2..$#cells) { $coco_str .= ",".$cells[$_]; } - - + + $coco_str =~ s/^,//; $cov_str = "--covar $addcov_name --covar-name $coco_str"; @@ -684,7 +684,7 @@ my %females_hash = (); my $keep_file = "keep_file.sex.idin"; if ($males){ - + die "$famfile:".$! unless open IFILE, "< $famfile"; die "$idin_name:".$! unless open OFILE, ">> $keep_file"; while (my $line = ){ @@ -700,7 +700,7 @@ if ($males){ if ($females){ - + die "$famfile:".$! unless open IFILE, "< $famfile"; die "$idin_name:".$! unless open OFILE, ">> $keep_file"; while (my $line = ){ @@ -719,7 +719,7 @@ if ($females){ if ($idin_name){ - + die "$idin_name:".$! unless open IFILE, "< $idin_name"; die "$idin_name:".$! unless open OFILE, ">> $keep_file"; while (my $line = ){ @@ -736,7 +736,7 @@ if ($idin_name){ if ($idex_name){ - + die "$idex_name:".$! unless open IFILE, "< $idex_name"; while (my $line = ){ my @cells = &split_line($line); @@ -768,12 +768,12 @@ if ($mds_name){ $mdsin{"$cells[0]\t$cells[1]"} = 1; my $covstr = ""; foreach my $cc (@mcols) { - print "$cc\n" if ($debug); + #print "$cc\n" if ($debug); $covstr .= " ".$cells[$cc+2]; } $mdsvals{"$cells[0]\t$cells[1]"} = $covstr; print "$covstr\n" if ($debug); - sleep (4) if ($debug); + #sleep (4) if ($debug); } close FILE; } @@ -787,24 +787,24 @@ if ($mds_name){ my %ngt=(); unless ($danscore_file) { unless (-e $ngtfile) { - + die "$workname.assoc.dosage.ngt:".$! unless open OFILE, "> dan_$workname.assoc.dosage.ngt"; print OFILE " CHR SNP BP A1 A2 FRQ_A_000 FRQ_U_000 INFO OR SE P ngt\n"; close OFILE; - + &mysystem("gzip -f dan_$workname.assoc.dosage.ngt"); &mysystem("cp dan_$workname.assoc.dosage.ngt.gz $outdir"); - + # if ($danscore_file) { # &mysystem("touch $outdir/dan_$workname.profile.log"); # } - + chdir ($outdir); &mysystem("rm -fr $workdir"); print "no ngt file\n"; exit; - + } @@ -850,7 +850,7 @@ while (my $line = ){ print EFILE "$cells[0]\t$cells[1]\n"; - + delete ($alphe{"$cells[0]\t$cells[1]"}); next; } @@ -858,10 +858,10 @@ while (my $line = ){ # next unless (exists $mdsin{"$cells[0]\t$cells[1]"}); } - my $del = 0; - + my $del = 0; + if ($pheno){ - + my $c0 = $cells[0]; # print "hier: ".$c0."\t".$cells[1]."\t".$alphe{$c0."\t".$cells[1]}."\n"; if ($star == 0) { @@ -869,7 +869,7 @@ while (my $line = ){ } if (exists $alphe{$c0."\t".$cells[1]}){ # print $c0."\t".$cells[1]."\n"; - $cells [5] = $alphe{$c0."\t".$cells[1]} + $cells [5] = $alphe{$c0."\t".$cells[1]} } else { $cells [5] = -9; @@ -881,15 +881,15 @@ while (my $line = ){ # } } - # print "new: $cells[0]"; - # print " $cells[1]"; - # print " $cells[2]"; - # print " $cells[3]"; - # print " $cells[4]"; - # print " $cells[5]\n"; + # print "new: $cells[0]"; + # print " $cells[1]"; + # print " $cells[2]"; + # print " $cells[3]"; + # print " $cells[4]"; + # print " $cells[5]\n"; # sleep(1); - + print OFILE "$cells[0]"; print OFILE "\t$cells[1]"; print OFILE "\t$cells[2]"; @@ -897,8 +897,8 @@ while (my $line = ){ print OFILE "\t$cells[4]"; print OFILE "\t$cells[5]"; print OFILE "\n"; - - + + next if (exists $idex{$cells[0]."\t".$cells[1]}); if ($idin_name) { next unless (exists $idin{$cells[0]."\t".$cells[1]}); @@ -915,7 +915,7 @@ while (my $line = ){ # print "phenotype: $cells[5]\n"; $ncaco{$cells[5]} ++; $Nall++ if ($del == 0); - + } close IFILE; close OFILE; @@ -937,7 +937,7 @@ if ($ncaco{1} + $ncaco{2} < 5) { #print "debug sleep\n"; #sleep(10); - + #die "$mds_name.ex: ".$! unless open FILE, "> $mds_name.ex"; #foreach (keys %mdsin){ @@ -963,21 +963,21 @@ if ($ncaco{1} + $ncaco{2} < 5) { # exit; - - + + ########################################### ### here actual logistic regression ############################################ my $idex_str = ""; if ($idex_name) { $idex_str = "--remove $idex_name"; -} +} my $idin_str = ""; if ($idin_name || $males || $females) { $idin_str = "--keep $keep_file"; -} +} @@ -1040,14 +1040,14 @@ if ($danscore_file) { my @loggrep_arr = &split_line($loggrep); $wc_soll = $loggrep_arr[0]; } - + $wc_soll = $wc_soll * 1; if ($wc_soll < 1) { print "Error: less than 1 ID left\n"; exit; } - + foreach(@profilefiles) { my $wc_str = `wc -l $_`; my @wc_arr = &split_line($wc_str); @@ -1076,16 +1076,16 @@ if ($danscore_file) { if (-e $inname_loc) { die "$inname_loc:".$! unless open IFILE, "< $inname_loc"; die "$outname_loc:".$! unless open PFILE, "> $outname_loc"; - + my $line = ; - + print PFILE "FID\tIID\tCOUNT\tPHENO\tSCORE"; print PFILE "\t".$mdsvals{"FID\tIID"}; print PFILE "\n"; - + while ($line = ){ my @cells = &split_line($line); - + print PFILE "$cells[0]"; print PFILE "\t$cells[1]"; print PFILE "\t$cells[3]"; @@ -1093,10 +1093,10 @@ if ($danscore_file) { print PFILE "\t$cells[5]"; print PFILE "\t".$mdsvals{"$cells[0]\t$cells[1]"}; print PFILE "\n"; - + $count_bin[$nn] += $cells[3]; $n_bin[$nn]++; - + } close IFILE; close PFILE; @@ -1116,13 +1116,13 @@ if ($danscore_file) { $trio_txt = "--trio"; } my $sys = "danscore_result_3 --out $workname --tarball $workname.tar.gz $trio_txt"; - &mysystem($sys); + &mysystem($sys); - ## add first column to output file + ## add first column to output file die "$workname.poly.out.txt:".$! unless open IFILE, "< $workname.poly.out.txt"; die "$workname.poly.out.txt.combined:".$! unless open PFILE, "> $workname.poly.out.txt.combined"; - + my $line = ; print PFILE "NBIN\t$line"; my $nn=1; @@ -1139,22 +1139,22 @@ if ($danscore_file) { close PFILE; - + &mysystem("cp $workname.tar.gz $outdir/"); &mysystem("cp $workname.pdf $outdir/"); &mysystem("cp $workname.poly.out.txt.combined $outdir/"); &mysystem("cp R.$workname.hl_nw.Rin $outdir/"); - + } - - else { + + else { &mysystem("cp $workname.S* $outdir/"); &mysystem("mv $workname.log $outdir/dan_$workname.profile.log"); } - + &mysystem("mv $famfile.pt.ex $outdir") unless (-e "$outdir/$famfile.pt.ex"); - + chdir ($outdir); @@ -1168,13 +1168,13 @@ if ($danscore_file) { #exit; - + ########################################### ### here permutation ############################################ -foreach my $iperm (1..$permut) { +foreach my $iperm (1..$permut) { unless (-e "$outdir/perm_$workname.perm$iperm"){ @@ -1190,16 +1190,16 @@ foreach my $iperm (1..$permut) { print PFILE "\t$cells[4]"; print PFILE "\t$cells[5]"; print PFILE "\n"; - + } close IFILE; close PFILE; - + my $sys = "$p2loc/plink --silent --memory 2000 --dosage $dosfile format=2 --fam $famfile.pt.perm --allow-no-sex --out $workname.perm --map $mapfile $cov_str"; print "$sys\n" if ($debug); - + &mysystem($sys); - + die "$workname.perm.assoc.dosage:".$! unless open IFILE, "< $workname.perm.assoc.dosage"; die "perm_$workname.perm$iperm:".$! unless open OFILE, "> perm_$workname.perm$iperm"; my $head = ; @@ -1210,34 +1210,34 @@ foreach my $iperm (1..$permut) { } close IFILE; close OFILE; - + &mysystem("cp perm_$workname.perm$iperm $outdir") ; &mysystem("rm $workname.perm.assoc.dosage") ; - - + + unless (-e "$outdir/permfam.perm$iperm") { - + die "$famfile.pt.perm:".$! unless open IFILE, "< $famfile.pt.perm"; die "permfam.perm$iperm:".$! unless open PFILE, "> permfam.perm$iperm"; while (my $line = ){ my @cells = &split_line($line); - + print PFILE "$cells[0]"; print PFILE "\t$cells[1]"; print PFILE "\t$cells[5]"; print PFILE "\n"; - + } close IFILE; close PFILE; - + &mysystem("cp permfam.perm$iperm $outdir") ; - + } } - + # exit; - + } @@ -1251,8 +1251,8 @@ foreach my $iperm (1..$permut) { # add NGT information ################################################# - -unless ($danscore_file) { + +unless ($danscore_file) { die "$workname.assoc.dosage:".$! unless open IFILE, "< $workname.assoc.dosage"; die "$workname.assoc.dosage.ngt:".$! unless open OFILE, "> dan_$workname.assoc.dosage.ngt"; my $head = ; @@ -1281,7 +1281,7 @@ unless ($danscore_file) { &mysystem("mv $famfile.pt.ex $outdir") unless (-e "$outdir/$famfile.pt.ex"); &mysystem("mv $workname.log $outdir/dan_$workname.assoc.log"); - + chdir ($outdir); &mysystem("mv dan_$workname.assoc.dosage.ngt.gz.tmp dan_$workname.assoc.dosage.ngt.gz"); &mysystem("rm $outdir/dan_$workname.assoc.dosage.plink_started"); diff --git a/rp_bin/danscore_multiplot_2 b/rp_bin/danscore_multiplot_2 index fcc3304..f52c427 100755 --- a/rp_bin/danscore_multiplot_2 +++ b/rp_bin/danscore_multiplot_2 @@ -6,7 +6,7 @@ use strict; # read config file ############################# -my $conf_file = $ENV{HOME}."/ricopili.conf"; +my $conf_file = $ENV{RPHOME}."/ricopili.conf"; my %conf = (); die $!."($conf_file)" unless open FILE, "< $conf_file"; @@ -72,7 +72,7 @@ version: $version #### evaluate options use Getopt::Long; -GetOptions( +GetOptions( "help" => \my $help, "minusone" => \my $minusone, "out=s" => \my $out, @@ -199,7 +199,7 @@ foreach (@p_th) { ##### go through all files my $fico = 0; ## this is just a counter -## here the columns +## here the columns my $or_col = 11; my $coeff_col = 16; my $orl_col = 12; @@ -462,7 +462,7 @@ mtext("R - squared", side=2, line=-1, cex.lab=1,las=2, padj =-18) coldiff = coldiff * 1.5; } -# if (XAXIS) { +# if (XAXIS) { # text(x = colMeans(rip)-coldiff, y= 0.00, offset = 1.5, pos = 1, xpd =T , line = 1, labels = colnames(plotmatrix_r2), cex= cex.loc,srt=45) ### this here is new # } @@ -477,7 +477,7 @@ mtext("R - squared", side=2, line=-1, cex.lab=1,las=2, padj =-18) # ripx_nocov = rip [11:22] my.cex = .7 if (FICO * NPTH > 100) { - my.cex =.3 + my.cex =.3 } print (FICO) @@ -508,7 +508,7 @@ print (FICO) # print (ripx) #if (PUBL) { - text(x=ripx,y=poly_r2[,cc],sig,srt=75,cex=my.cex,adj =c(-.2,0)) + text(x=ripx,y=poly_r2[,cc],sig,srt=75,cex=my.cex,adj =c(-.2,0)) #} @@ -544,7 +544,7 @@ if (PUBL) { ####################################################################### ## here the plot for one p-value threshold -xl = as.numeric(poly_nca["PTHRESH",]); +xl = as.numeric(poly_nca["PTHRESH",]); yl = as.numeric(poly_r2_b["PTHRESH",]); ylh1 = as.numeric(poly_h2l1["PTHRESH",]); ylh2 = as.numeric(poly_h2l2["PTHRESH",]); @@ -553,33 +553,33 @@ print (yl); print (poly_nca); print (poly_r2_b); ### r2 -plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = PTHRESH"); +plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = PTHRESH"); text(xl,yl,labels= colnames(poly_r2_b),cex=0.4,pos=3) ### 2nd value -xl = as.numeric(poly_nca["P2THRESH",]); +xl = as.numeric(poly_nca["P2THRESH",]); yl = as.numeric(poly_r2_b["P2THRESH",]); ylh1 = as.numeric(poly_h2l1["P2THRESH",]); ylh2 = as.numeric(poly_h2l2["P2THRESH",]); -plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P2THRESH"); +plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P2THRESH"); text(xl,yl,labels= colnames(poly_r2_b),cex=0.4,pos=3) ### 3nd value -xl = as.numeric(poly_nca["P3THRESH",]); +xl = as.numeric(poly_nca["P3THRESH",]); yl = as.numeric(poly_r2_b["P3THRESH",]); ylh1 = as.numeric(poly_h2l1["P3THRESH",]); ylh2 = as.numeric(poly_h2l2["P3THRESH",]); -plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P3THRESH"); +plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P3THRESH"); text(xl,yl,labels= colnames(poly_r2_b),cex=0.4,pos=3) ### 4th value -xl = as.numeric(poly_nca["P4THRESH",]); +xl = as.numeric(poly_nca["P4THRESH",]); yl = as.numeric(poly_r2_b["P4THRESH",]); ylh1 = as.numeric(poly_h2l1["P4THRESH",]); ylh2 = as.numeric(poly_h2l2["P4THRESH",]); -plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P4THRESH"); +plot(xl,yl,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. R2 on p = P4THRESH"); text(xl,yl,labels= colnames(poly_r2_b),cex=0.4,pos=3) @@ -589,15 +589,15 @@ cih = ylh1 + 2* ylh2 cil = ylh1 - 2* ylh2 maxyl = max(cih) minyl = min(cil) -#plot(xl,ylh1,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. H2L on p = PTHRESH"); +#plot(xl,ylh1,cex=.6,col="red",xlab="Ncase",ylab="R2", main = "Ncase vs. H2L on p = PTHRESH"); #text(xl,ylh1,labels= colnames(poly_r2_b),cex=0.4,pos=3) ll = length(ylh1) -plot(1:ll,type = "b",ylh1,cex=.6,col="red",xlab="index",ylab="H2L", main = "H2L on p = PTHRESH",ylim =c(minyl,maxyl)); -points(1:ll,type = "b",cih,cex=.6,col="black"); -points(1:ll,type = "b",cil,cex=.6,col="black"); +plot(1:ll,type = "b",ylh1,cex=.6,col="red",xlab="index",ylab="H2L", main = "H2L on p = PTHRESH",ylim =c(minyl,maxyl)); +points(1:ll,type = "b",cih,cex=.6,col="black"); +points(1:ll,type = "b",cil,cex=.6,col="black"); @@ -614,7 +614,7 @@ points(1:ll,type = "b",cil,cex=.6,col="black"); for (ii in 2:FICO) { outname = colnames(plotmatrix)[ii-1] print (outname) - pdf (paste(outname,"_OUTNAME",sep=""),8.7,6) + pdf (paste(outname,"_OUTNAME",sep=""),8.7,6) pm_loc = plotmatrix [,ii-1] rip_loc <- barplot(pm_loc,beside=T,legend=F,col=mycol, main = "polygene R2 + P", ylim=c(minp,maxp*1.2),cex.names = .5, axisnames= F,las = 1) dev.off(); @@ -672,8 +672,8 @@ else { } $R_templ =~ s/OUTNAME/$out.pdf/g; #$R_templ =~ s/XLAB/$xlab[$ccol]/g; - - + + &a2file ("$out.R_poly.in", $R_templ); &mysystem("$r_sys < $out.R_poly.in --vanilla "); #&mysystem("source /broad/software/scripts/useuse; use R-2.14; R< R_poly.in --vanilla "); diff --git a/rp_bin/filtnorm_filter b/rp_bin/filtnorm_filter index aa0ce48..707c1ac 100755 --- a/rp_bin/filtnorm_filter +++ b/rp_bin/filtnorm_filter @@ -24,7 +24,7 @@ use Compress::Zlib ; ##### help message my $usage = " -Usage : $progname +Usage : $progname version: $version @@ -43,7 +43,7 @@ version: $version use Getopt::Long; -GetOptions( +GetOptions( "help"=> \my $help, "filtnorm=s"=> \my $filtnorm_file, @@ -120,11 +120,11 @@ my $filtnorm_out_tmp = $filtnorm_file.".reform.tmp.gz"; unless (-e $filtnorm_info) { my $igz = gzopen("$filtnorm_file", "rb") or die "Cannot open file $filtnorm_file: $gzerrno\n" ; my $ogz = gzopen("$filtnorm_info_tmp", "wb") or die "Cannot open file $filtnorm_info_tmp: $gzerrno\n" ; - - + + print "reading info out of $filtnorm_file into $filtnorm_info\n" if ($debug); my $icc = 0; - + $ogz->gzwrite("CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n"); while ($igz->gzreadline(my $line)){ chomp($line); @@ -135,19 +135,19 @@ unless (-e $filtnorm_info) { # } # if ($cells[0] eq "Y") { # $cells[0] = "24"; -# } +# } my $out_str = $cells[0]; foreach my $cc (1..8) { $out_str .= " ".$cells[$cc]; } $ogz->gzwrite("$out_str\n"); } - + $icc++; - print "$icc lines read\n" if ($icc % 10000 == 0) if ($debug); + print "$icc lines read\n" if ($debug); # last if ($icc ==10000); } - + $igz->gzclose(); $ogz->gzclose(); &mysystem ("mv $filtnorm_info_tmp $filtnorm_info"); @@ -195,17 +195,17 @@ while ($igz->gzreadline(my $line)){ } if (length($cells[$a1_col]) > 1) { $indel = 1; - } - + } + if ($indel) { - + if (exists $pos_hash_arr{$pos_loc}) { $multipos{$pos_loc}++; } push(@{$pos_hash_arr{$pos_loc}}, $row_cc); } - + my @icells = split ";",$cells[$info_col]; my $ac_loc = $icells[0]; $ac_loc =~ s/AC=//; @@ -247,7 +247,7 @@ foreach my $pos_loc (keys %multipos) { $ac_max = $ac_loc if ($ac_loc > $ac_max); } $ac_max_hash{$pos_loc} = $ac_max - + } #exit; @@ -266,13 +266,13 @@ foreach my $pos_loc (keys %multipos) { my @icells = split " ",$row; my $ac_loc = $icells[4]; -# print $row_arr[$row_cc]; +# print $row_arr[$row_cc]; if ($ac_loc eq $ac_max_hash{$pos_loc} && $max_defined == 0) { # print " xxxx"; $max_defined = 1; } else { - $out_hash{$row_cc} = 1; + $out_hash{$row_cc} = 1; # print " ----"; } # print " $row_cc\n"; @@ -292,18 +292,18 @@ unless (-e $filtnorm_out) { my $igz = gzopen("$filtnorm_file", "rb") or die "Cannot open file $filtnorm_file: $gzerrno\n" ; my $ogz = gzopen("$filtnorm_out_tmp", "wb") or die "Cannot open file $filtnorm_out_tmp: $gzerrno\n" ; my $oxgz = gzopen("$filtnorm_ex", "wb") or die "Cannot open file $filtnorm_ex: $gzerrno\n" ; - - + + print "filter $filtnorm_file into $filtnorm_out\n"; my $icc = 0; - + # $ogz->gzwrite("CHROM POS ID REF ALT QUAL FILTER INFO FORMAT\n"); while ($igz->gzreadline(my $line)){ chomp($line); - + unless ($line =~ /^#/) { unless (exists $out_hash{$icc}) { @@ -318,15 +318,15 @@ unless (-e $filtnorm_out) { # } # if ($cells[0] eq "Y") { # $cells[0] = "24"; -# } - +# } + my $out_str = $cells[0]; foreach my $cc (1..8) { $out_str .= " ".$cells[$cc]; } $oxgz->gzwrite("$out_str\n"); } - + $icc++; } else { @@ -334,11 +334,11 @@ unless (-e $filtnorm_out) { } - + # print "$icc lines read\n" if ($icc % 10000 == 0); # last if ($icc ==10000); } - + $igz->gzclose(); $ogz->gzclose(); $oxgz->gzclose(); diff --git a/rp_bin/my.wget b/rp_bin/my.wget index 1a40a12..6c32bd0 100755 --- a/rp_bin/my.wget +++ b/rp_bin/my.wget @@ -15,13 +15,10 @@ if (@ARGV != 2) { # only header from first file (and check success) -my $sc =system ("wget -n $ftp_name/$vcf_name"); +my $sc =system ("wget -nv $ftp_name/$vcf_name"); if ($sc != 0){ print "systemcode: $sc\n"; exit; } -$sc =system ("mv $vcf_name $vcf_loc"); - - - +$sc =system ("mv $vcf_name $vcf_loc"); \ No newline at end of file diff --git a/rp_bin/postimp_navi b/rp_bin/postimp_navi index fddbad4..4c07893 100755 --- a/rp_bin/postimp_navi +++ b/rp_bin/postimp_navi @@ -1,11 +1,11 @@ #!/usr/bin/env perl use strict; - + ######################################## # -# version 24: -# +# version 24: +# # chunk results and then meta-chunk # area_plot_speed # new manhattan plot @@ -145,9 +145,9 @@ my $info_th = "0.1"; #print "hmm\n"; -#exit; +#exit; -## permutation +## permutation ## awk '{print $1,$2,int(2*rand())+1}' ....mds_cov > .....mds_cov.perm1 ## @@ -226,9 +226,9 @@ version: $rp_version --idin STRING ID inclusion list (before imputation, while ripping) --sep_pca create and use a separate PCA for each cohort (will get better control over popstrat) - --sep_idin STRING use a different IDset than the separate + --sep_idin STRING use a different IDset than the separate - --mds STRING mds-file (with studyindicator, (if necessary), + --mds STRING mds-file (with studyindicator, (if necessary), NOMDS if no pcaer wished and analysis without covariates --coco STRING comma separated PCA numbers @@ -255,10 +255,10 @@ version: $rp_version 0 - allelic (default) 1 - dominant for first allele 2 - recessive for first allele - 3 - heterocygote + 3 - heterocygote -# --nomega do a meta and work from there, +# --nomega do a meta and work from there, # must do with real big datasets # --nocon do without conditional # --nocovar analysis without covariates @@ -317,7 +317,7 @@ version: $rp_version --popname STRING population for clumping, right now: eur, eas, sas, amr, afr --regplot STRING additional regplot file - + ### --cond conditional analysis on areator output --males only use males, famfile needs to be right @@ -331,7 +331,7 @@ version: $rp_version --serial no sending jobs to queue all in one run - -> usually only used for testing + -> usually only used for testing --sepa INT use INT number of parallel jobs during serial --sleep INT sleep for INT seconds, try this if you think there is a race condition @@ -355,12 +355,12 @@ version: $rp_version ************ created by Stephan Ripke 2009 at MGH, Boston, MA - + "; use Getopt::Long; -GetOptions( +GetOptions( "s2gr=i"=> \ $s2gr, "chr=i"=> \ $chr, @@ -493,7 +493,7 @@ my $blueprint_script = "blueprint"; ### my.pipeline_tar my $clumpnavi2_script = "clump_nav3"; ### my.pipeline_tar my $shrinkpdf_script = "shrinkpdf"; ### my.pipeline_tar my $comp1mhc_script = "comp1mhc2"; ### my.pipeline_tar -#my $comp1mhc1_script = "comp1mhc"; +#my $comp1mhc1_script = "comp1mhc"; my $comp1mhcreg_script = "comp1mhc_reg"; ### my.pipeline_tar my $pdfjoin_script = "pdfjoin"; ### my.pipeline_tar my $pdfnup_script = "pdfnup"; ### my.pipeline_tar @@ -557,7 +557,7 @@ my @miss_scripts; #my $err_scr = 0; foreach my $scr_name (@test_scripts) { my $scr_path = ''; - + for my $path ( split /:/, $ENV{PATH} ) { if ( -f "$path/$scr_name" && -x _ ) { print "$scr_name\tfound in $path\n" if ($debug); @@ -569,7 +569,7 @@ foreach my $scr_name (@test_scripts) { push @miss_scripts, "cp /home/unix/sripke/bin/$scr_name ./\n"; print "!!Error!! : No $scr_name command available\n" ; } - + } #exit; @@ -602,7 +602,7 @@ my $noti = 1; my $err_scr = 0; { my $scr_path = ''; - + for my $path ( split /:/, $ENV{PATH} ) { if ( -f "$path/$mutt_script" && -x _ ) { print "$mutt_script\tfound in $path\n" if ($debug); @@ -627,10 +627,10 @@ my $err_scr = 0; print "!!Warning!! : No $mutt_script command available, no email notifications\n"; $noti = 0; - + } } - + } die if $err_scr == 1; @@ -915,18 +915,18 @@ sub send_jobarray { $fini_message .= "##### rp_pipeline_postimp finished successfully:\n"; $fini_message .= "##### $sjainfotxt\n"; $fini_message .= "##### have a look at the distributions subdir for output files\n"; - $fini_message .= "##### have a look at the wiki page for more details\n"; + $fini_message .= "##### have a look at the wiki page for more details\n"; $fini_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n"; $fini_message .= "##################################################################\n"; print "$fini_message\n"; - + die $! unless open SUC, "> success_file"; print SUC $fini_message."\n"; close SUC; if ($noti == 1) { - + my $sys_email = 'cat success_file | '.$mutt_script.' -s RP_postimp_finished '.$email; print "$sys_email\n"; &mysystem ($sys_email) ; @@ -947,18 +947,18 @@ sub send_jobarray { $fini_message .= "##### rp_pipeline_postimp paused\n"; $fini_message .= "##### $sjainfotxt\n"; $fini_message .= "##### please read and follow instructions above then restart\n"; - $fini_message .= "##### have a look at the wiki page for more details\n"; + $fini_message .= "##### have a look at the wiki page for more details\n"; $fini_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n"; $fini_message .= "##################################################################\n"; print "$fini_message\n"; - + die $! unless open SUC, "> success_file"; print SUC $fini_message."\n"; close SUC; if ($noti == 1) { - + my $sys_email = 'cat success_file | '.$mutt_script.' -s RP_postimp_paused '.$email; print "$sys_email\n"; &mysystem ($sys_email) ; @@ -971,7 +971,7 @@ sub send_jobarray { exit; } - + @@ -1029,7 +1029,7 @@ sub send_jobarray { $err_message .= "##### postimp pipeline stopped\n"; $err_message .= "##### $sjainfotxt\n"; $err_message .= "##### if reason does not appear obvious\n"; - $err_message .= "##### have a look at the wiki page\n"; + $err_message .= "##### have a look at the wiki page\n"; $err_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n"; $err_message .= "##### or contact the developers\n"; $err_message .= "##### version: $rp_version\n"; @@ -1056,7 +1056,7 @@ sub send_jobarray { } - + ################################# ## starting the job array ################################## @@ -1086,13 +1086,13 @@ sub send_jobarray { @job_sepa_arr = (); } $jc++; - + } if (@job_sepa_arr > 0) { $jc--; push @job_sepa_arr, "wait"; - + my $sepa_file = "$sjaname.sepa.$jc"; &a2filenew ($sepa_file,@job_sepa_arr); print "sepa_file: ".$sepa_file."\n" if ($debug); @@ -1100,9 +1100,9 @@ sub send_jobarray { &mysystem("./$sepa_file"); } - + } - else { + else { my $sys_loc = "$blueprint_script $sja_week_str --noerr --njob $nsja_loc --array $jobfile --wa $sjatime --mem $sjamem --j --na $jobfile $multi_txt"; &mysystem ($sys_loc); } @@ -1110,15 +1110,15 @@ sub send_jobarray { - + $command_line =~ s/--force1//; my $wt_file = "$sjadir/j.$jobfile.id"; chdir "$rootdir" or die "something strange"; - - + + if ($serial) { my $sys_re = "$command_line"; @@ -1365,7 +1365,7 @@ unless (-e "datasets_info") { my $dasudir_loc_qc1 = "$dasudir"."qc1_$bitemp"; $dasudir_loc_qc1 .= ".hg19.ch.fl"; # print "start: $dasudir_loc_qc1 \n"; - + unless (-e $dasudir_loc_qc1) { my $tmpdir = $dasudir_loc_qc1; @@ -1456,7 +1456,7 @@ if ($trioset_file){ $trioset{$cells[0].".ch.fl"} = 1; $trioset{$cells[0].".hg19.ch.fl"} = 1; - + # $cells[0] .= ".hg19.ch.fl"; # $trioset{$cells[0]} = 1; @@ -1509,42 +1509,42 @@ unless ($sep_pca) { unless (-e $mds_name_sub) { die $!." <$mds_name>" unless open MDSI, "< $mds_name"; die $! unless open MDSO, "> $mds_name_sub"; - + my @mds_arr = split ',' ,$mds_cols; #print "@mds_arr\n"; my $line = ; my @cells = &split_line($line); - + foreach (2..$#cells){ push @mds_arr, ($_-2) if ($cells[$_] =~ /^st/); push @mds_arr, ($_-2) if ($cells[$_] =~ /SCORE/); } print MDSO "$cells[0]"; print MDSO "\t$cells[1]"; - - + + foreach (@mds_arr){ my $cn = $_+2; print MDSO "\t$cells[$cn]"; } print MDSO "\n"; - + while (my $line = ){ my @cells = &split_line($line); - + print MDSO "$cells[0]"; print MDSO "\t$cells[1]"; - + foreach (@mds_arr){ my $cn = $_+2; print MDSO "\t$cells[$cn]"; } - + print MDSO "\n"; } - - + + close MDSI; close MDSO; } @@ -1573,37 +1573,37 @@ unless ($sep_pca) { unless (-e $mds_name_sub_mds) { die $!." <$mds_name>" unless open MDSI, "< $mds_name"; die $! unless open MDSO, "> $mds_name_sub_mds"; - + my @mds_arr = split ',' ,$mds_cols; #print "@mds_arr\n"; my $line = ; my @cells = &split_line($line); - + print MDSO "$cells[0]"; print MDSO "\t$cells[1]"; - + foreach (@mds_arr){ die "wrong mds-number" if ($_ < 1); my $cn = $_+2; print MDSO "\t$cells[$cn]"; } print MDSO "\n"; - + while (my $line = ){ my @cells = &split_line($line); - + print MDSO "$cells[0]"; print MDSO "\t$cells[1]"; - + foreach (@mds_arr){ my $cn = $_+2; print MDSO "\t$cells[$cn]"; } - + print MDSO "\n"; } - - + + close MDSI; close MDSO; } @@ -1693,7 +1693,7 @@ if ($danscore_file ne "") { {verbose => 0, mode => 0750}, ); } - + my @created = mkpath( ## $created ? $distdir, {verbose => 0, mode => 0750}, @@ -1770,7 +1770,7 @@ if ($polyto_file ne "") { unless ($sep_pca) { unless ($result_name){ - + if ($mds_name ne "NOMDS") { die $!." <$mds_name>" unless open MDSI, "< $mds_name"; while (my $line = ){ @@ -1807,9 +1807,9 @@ unless ($result_name){ my $famloc = $famfile_loc; $famloc =~ s/hg19.ch.fl.fam$/fam/; $famloc =~ s/ch.fl.fam$/fam/; - - + + die $!." <$famloc>" unless open IN, "< $famloc"; while (my $line = ){ my @cells = &split_line($line); @@ -1824,7 +1824,7 @@ unless ($result_name){ print "this fam-file without any IDs in analysis (--mds): ".$bf."\t".$famloc."\n" ; } } - + @bimfli_files = @bimfli_files_tmp; @bimfli_files_tmp = (); } @@ -1889,7 +1889,7 @@ unless ($result_name){ print "this fam-file without any IDs in analysis (--pheno): ".$bf."\n"; } } - + @bimfli_files = @bimfli_files_tmp; @bimfli_files_tmp = (); } @@ -1955,7 +1955,7 @@ unless ($result_name){ print "this fam-file without any IDs in analysis (--idin): ".$bf."\n"; } } - + @bimfli_files = @bimfli_files_tmp; @bimfli_files_tmp = (); } @@ -2005,9 +2005,9 @@ my $n_cdg = keys %cdg_dis; #my @daner_dicoll; if ($n_cdg > 1) { foreach my $cx (keys %cdg_dis) { - print "dis: $cx\n" if ($debug); + print "dis: $cx\n" if ($debug); my $dadimetadir = $dametadir."_$cx"; - + my @created = mkpath( ## $created ? $dadimetadir, {verbose => 0, mode => 0750}, @@ -2046,7 +2046,7 @@ unless ($sep_pca) { &mysystem ("cp $rootdir/$mds_name ."); $co_str = "--mds $mds_name --coco $mds_cols"; } - + if ($pheno_file ne "NOPHENO") { &mysystem ("cp $rootdir/$pheno_file .") ; $pheno_str = "--pheno $pheno_file"; @@ -2092,15 +2092,15 @@ unless ($sep_pca) { } - + my @id_arr = (); my @pt_arr = (); - + die $!." <$mds_name>" unless open MDSI, "< $mds_name"; my $header = ; while (my $line = ){ my @cells = &split_line($line); - + push @id_arr, "$cells[0]\t$cells[1]"; my $pt_loc = -9; if ($cells[0] =~ /^cas_/) { @@ -2110,12 +2110,12 @@ unless ($sep_pca) { $pt_loc = 1; } push @pt_arr, $pt_loc; - + } - close MDSI; + close MDSI; srand ($siperm); &fisher_yates_shuffle (\@pt_arr); - + die $!." <$pheno_file>" unless open PO, "> $pheno_file"; foreach my $cloc (0..$#id_arr){ print PO $id_arr[$cloc]."\t".$pt_arr[$cloc]."\n"; @@ -2125,8 +2125,8 @@ unless ($sep_pca) { $pheno_str = "--pheno $pheno_file"; } - - + + if ($idex_name) { &mysystem ("cp $rootdir/$idex_name .") ; $idex_str = "--idex $idex_name"; @@ -2147,10 +2147,10 @@ unless ($sep_pca) { print "Error: $addcov_name does not contain header FID IID PT....\n"; exit; } - + $addcov_str = "--addcov $addcov_name"; } - + } } @@ -2207,22 +2207,22 @@ if ($sep_idin_str) { if ($sep_pca) { - + my $mdscov_file = "$outname.covfiles"; unless (-e $mdscov_file) { my $pcaer_count = 0; - + my @sep_pca_str_single; my @mdsfile_txt; my $sep_pca_str_comb = "pcaer --preferfam --prefercase --out $outname"; - - + + unless (-e "$rootdir/daner_$outname.done") { unless ($result_name){ - + #### stuff that is done on full datasets foreach (@bimfli_files) { - + my $bfile = $_; my $bfile_bgs = "$bfile.bgs"; @@ -2257,7 +2257,7 @@ if ($sep_pca) { } my $pca_str = "1,2,3,4"; - + die $!." <$bfile.bgs.menv.mds.overworked.p_assoc.txt>" unless open IN, "< $bfile.bgs.menv.mds.overworked.p_assoc.txt"; my $npca = 0; while (my $line = ){ @@ -2271,10 +2271,10 @@ if ($sep_pca) { push @mdsfile_txt, "$bfile $bfile.bgs.menv.trans.mds $pca_str"; } - $sep_pca_str_comb .= " $bfile.bgs"; + $sep_pca_str_comb .= " $bfile.bgs"; } - + } unless (-e "$seppcadir/$sep_idin") { @@ -2291,7 +2291,7 @@ if ($sep_pca) { print "debug2\n" if ($debug); } } - } + } if ($pcaer_count > 0) { my $start_file = "start_pcaer.sh"; @@ -2305,11 +2305,11 @@ if ($sep_pca) { close FILE1; &mysystem ("chmod u+x $seppcadir/$start_file"); - - + + print "\n###########################################################################\n" if ($debug); print "separate PCAs requested, please start these commands from the command line:\n\n" if ($debug); - + print "\tcd $seppcadir\n" if ($debug); print "\t./$start_file\n\n" if ($debug); print "wait till all pcaer-jobs (_pc_*) ran through and restart postimp-pipeline\n" if ($debug); @@ -2318,14 +2318,14 @@ if ($sep_pca) { ############################################################# ## SUCCESSSS ############################################################# - + $sjadir = $rootdir; $sjaname = "paused"; push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - - + + &send_jobarray; } @@ -2340,13 +2340,13 @@ if ($sep_pca) { print FILE1 "$_\n"; } close FILE1; - + } } else { print "$mdscov_file exists, now going to next step\n" if ($debug); } - + die $!." <$mdscov_file>" unless open IN, "< $mdscov_file"; while (my $line = ){ my @cells = &split_line($line); @@ -2369,8 +2369,8 @@ if ($sep_pca) { close MDSI; - - + + foreach my $bf (@bimfli_files) { my $bfile = $bf; $bfile =~ s/.bim$//; @@ -2382,7 +2382,7 @@ if ($sep_pca) { my $found = 0; - + die $!." <$rootdir/$famloc>" unless open IN, "< $rootdir/$famloc"; while (my $line = ){ my @cells = &split_line($line); @@ -2396,19 +2396,19 @@ if ($sep_pca) { close IN; if ($found == 0) { print "this fam-file without any IDs in analysis (--mds): ".$bf."\t".$famloc."\n" if ($debug); - - } - + + } + # if (exists $sep_covfile{$bfile}){ # -# print "$bfile\n"; +# print "$bfile\n"; # } } @bimfli_files = @bimfli_files_tmp2; # print "nbfile: ".@bimfli_files."\n"; - + } @@ -2433,12 +2433,12 @@ if ($sep_pca) { unless (-e "$rootdir/daner_$outname.done") { unless ($result_name){ - + ## walk through chunks foreach my $refind (@refind_arr) { - - + + my @dasifiles = (); # print @bimfli_files."\n"; @@ -2447,19 +2447,19 @@ unless (-e "$rootdir/daner_$outname.done") { foreach (@bimfli_files) { - + my $bfile = $_; my $co_str_loc = $co_str; $co_str_loc = "" if (exists $trioset_bimfli{$bfile}); - + $bfile =~ s/.bim$//; my $danerdir_loc = "$danerdir/da_$bfile"; my $hg19_remove = 0; - + my $dasudir_loc = "$dasudir"."_$bfile"; my $dasudir_loc_qc1 = "$dasudir"."qc1_$bfile"; @@ -2476,7 +2476,7 @@ unless (-e "$rootdir/daner_$outname.done") { } } -# print "$dasudir_loc\n"; +# print "$dasudir_loc\n"; # print "$dasudir_loc_qc1\n"; # sleep(3); # my $dasudir_loc_qc1_hg19 = "$dasudir"."qc1_$bfile"; @@ -2495,10 +2495,10 @@ unless (-e "$rootdir/daner_$outname.done") { if (-e $dasudir_loc_qc1) { $dasudir_loc = "$dasudir_loc_qc1/qc1"; - } + } -# print "$dasudir_loc\n"; -# print "$dasudir_loc_qc1/qc1\n"; +# print "$dasudir_loc\n"; +# print "$dasudir_loc_qc1/qc1\n"; my $empty_file = "$bfile.$refind.empty"; if (exists $empty_files{$empty_file}) { @@ -2554,13 +2554,13 @@ unless (-e "$rootdir/daner_$outname.done") { } else { push @danerdir_arr, $sys_loc ; - + ## NEW !!!!!! # if (@danerdir_arr > 5000) { # last; # } - - + + } } else { @@ -2574,7 +2574,7 @@ unless (-e "$rootdir/daner_$outname.done") { } } - + } } @@ -2582,13 +2582,13 @@ unless (-e "$rootdir/daner_$outname.done") { if ($danscore_neg == 0) { -# print "score bfile: $_, $refind\n"; +# print "score bfile: $_, $refind\n"; if ($danscore_file ne "") { - + my $danscoredir_loc = "$danscoredir/dsc_$bfile"; - + if ($bgscore) { - + my $bfile_bg = "$bfile.bgn"; my $bg_bed = "$bgdir/$bfile_bg.bed"; @@ -2596,26 +2596,26 @@ unless (-e "$rootdir/daner_$outname.done") { unless (-e $bg_bed) { print "Warning: best_guess dataset not existing: $bg_bed\n" if ($debug); $bfile_bg =~ s/.ch.fl.bgn$/.bgn/; - + $bg_bed = "$bgdir/$bfile_bg.bed"; unless (-e $bg_bed) { print "Error: best_guess dataset not existing: $bg_bed\n"; exit; } - + } my $workname = "score.$bfile_bg"; - - unless (exists $bg_in{$bfile_bg}) { + + unless (exists $bg_in{$bfile_bg}) { unless (-e "$danscoredir_loc/R.$workname.hl_nw.Rin"){ my $sys_loc = "$daner_script --score $danscore_file --range $range_file --nosi --indir $bgdir $idex_str $addcov_str $idin_str $pheno_str $co_str_loc --outdir $danscoredir_loc --bfile $bfile_bg nouse"; - + push @danscore_arr, $sys_loc; print "$sys_loc\n" if ($debug); print "$danscoredir_loc/R.$workname.hl_nw.Rin\n" if ($debug); - + $bg_in{$bfile_bg} = 1; } } @@ -2625,23 +2625,23 @@ unless (-e "$rootdir/daner_$outname.done") { if ($hg19_remove) { $ident =~ s/.hg19.ch.fl/.ch.fl/; } - - + + ### getting danscore_jobs unless (-e "$danscoredir_loc/dan_$ident.profile.log"){ - + my $take = 1; if ($trioset_file){ unless (exists $trioset{$bfile}) { $take = 0; } } - + if ($take == 1) { my $mapfile = "$dasudir_loc/dos_$ident.out.dosage.map"; if (-e $mapfile) { push @danscore_arr, "$daner_script --score $danscore_file --range $range_file --nosi --indir $dasudir_loc $idex_str $addcov_str $idin_str $pheno_str $co_str_loc --outdir $danscoredir_loc $ident"; - + } else { print "$mapfile not existing, maybe empty\n"; @@ -2656,19 +2656,19 @@ unless (-e "$rootdir/daner_$outname.done") { if ($bgscore) { last; } - - + + ### META-disease my @dadimeta_arr = (); if ($n_cdg > 1) { foreach my $cx (keys %cdg_dis) { - + # my @dasidifiles = grep {/\/dan_qc2report_$cx/} @dasifiles; my @dasidifiles = (); - + my @tmp_arr = @{$dadimeta_bimfli_files {$cx}}; - - + + foreach my $dsf (@dasifiles){ my $dsf_short = $dsf; $dsf_short =~ s!.*/dan_!!; @@ -2683,7 +2683,7 @@ unless (-e "$rootdir/daner_$outname.done") { my $dameta_out = "$dadimetadir/dan_$refind.assoc.dosage.meta.ngt"; unless (-e "$dameta_out.metadaner.gz") { my $sys = "$metaber_script --info_th $info_th $no_neff_filter_txt $xdf_txt --out $dameta_out @dasidifiles"; - push @daner_dimeta_arr, $sys; + push @daner_dimeta_arr, $sys; } push @ {$dadimeta_files {$cx}}, "$dameta_out.metadaner.gz"; push @dadimeta_arr, "$dameta_out.metadaner.gz"; @@ -2698,8 +2698,8 @@ unless (-e "$rootdir/daner_$outname.done") { unless (-e "$dameta_out.metadaner.gz") { my @dasifiles_loc = @dasifiles; @dasifiles_loc = @dadimeta_arr if ($n_cdg > 1); - - + + push @daner_meta_arr, "$metaber_script --info_th $info_th $no_neff_filter_txt $xdf_txt --out $dameta_out @dasifiles_loc"; # print "metaber5 $xdf_txt --out $dameta_out @dasifiles_loc\n"; # exit; @@ -2707,11 +2707,11 @@ unless (-e "$rootdir/daner_$outname.done") { else { $daner_fini_meta_count++; } - + my $dameta_out_short = "dan_$refind.assoc.dosage.meta.ngt"; push @dameta_files, "$dameta_out_short.metadaner.gz"; } - + # exit; last if ($test); } @@ -2735,7 +2735,7 @@ unless (-e "$rootdir/daner_$outname.done") { if (@danscore_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "danscore"; $sjatime = 1; @@ -2745,7 +2745,7 @@ if (@danscore_arr > 0) { $sjamem = 4000; } @sjaarray = @danscore_arr; - + &send_jobarray; } @@ -2775,7 +2775,7 @@ if ($bgscore) { print "Error: pdf not existing: $pdf_tmp\n"; exit; } - + } &mysystem ("cp $danscoredir_loc/$bfile_long.pdf $distdir"); @@ -2783,7 +2783,7 @@ if ($bgscore) { &mysystem ("cp $danscoredir_loc/$bfile_long.tar.gz $distdir"); &mysystem ("cp $danscoredir_loc/$bfile_short.fam.pt.ex $distdir"); &mysystem ("cp $danscoredir_loc/R.$bfile_long.hl_nw.Rin $distdir"); - + } # print "debug\n"; @@ -2794,14 +2794,14 @@ if ($bgscore) { ############################################################# ## SUCCESSSS ############################################################# - + $sjadir = $rootdir; $sjaname = "finished"; push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - - + + &send_jobarray; @@ -2831,7 +2831,7 @@ if ($danscore_file ne "") { &mysystem ("cp $rootdir/$mds_name_sub_mds . "); } $walltime = 1; - my $nreffiles = @refind_arr; + my $nreffiles = @refind_arr; my $trio_sw = ""; $trio_sw = "--trio" if ($trioset_file); my $sys = "$danscore_result_script --nref $nreffiles --out $outname --mds $mds_name_sub_mds $trio_sw"; @@ -2852,7 +2852,7 @@ if ($danscore_file ne "") { $sjatime = 2; $sjamem = 5000; @sjaarray = @dsc_arr; - + &send_jobarray; @@ -2871,7 +2871,7 @@ if ($danscore_file ne "") { - + print "copy successfull\n" if ($debug); } print "count SNPs\n" if ($debug) ; @@ -2909,14 +2909,14 @@ if ($danscore_file ne "") { &mysystem ("rm score.map.tmp") if (-e "score.map.tmp"); foreach (@bimfli_files) { my $bfile = $_; - - + + my $dasudir_loc = "$dasudir"."_$bfile"; my $dasudir_loc_qc1 = "$dasudir"."qc1_$bfile"; if (-e $dasudir_loc_qc1) { $dasudir_loc = "$dasudir_loc_qc1/qc1"; } - + foreach my $refind (@refind_arr) { my $ident = "$bfile.$refind"; my $mapfile = "$dasudir_loc/dos_$ident.out.dosage.map"; @@ -2925,7 +2925,7 @@ if ($danscore_file ne "") { } &mysystem ("mv score.map.tmp score.map"); - + } my $sysloc = "sort -k1,1 score.map | uniq > score.map.sorted"; @@ -2944,7 +2944,7 @@ if ($danscore_file ne "") { my @pth = qw /0.00000005 0.000001 0.0001 0.001 0.01 0.05 0.1 0.2 0.5 1.0/; # print "read sorted\n"; - + die $!." <$danscore_file.sorted.inref.sorted>" unless open MS, "< $danscore_file.sorted.inref.sorted"; # my $header = ; while (my $line = ){ @@ -2984,17 +2984,17 @@ if ($danscore_file ne "") { ############################################################# ## SUCCESSSS ############################################################# - + $sjadir = $rootdir; $sjaname = "finished"; push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - - + + &send_jobarray; - + exit; } } @@ -3003,39 +3003,39 @@ if ($danscore_file ne "") { #exit; if (@danerdir_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "daner"; $sjatime = 2; $sjamem = 1000; @sjaarray = @danerdir_arr; - + &send_jobarray; } #exit; if (@danerdir_arr_long > 0) { - + $sjadir = $danerjobdir; $sjaname = "danerlong"; $sjatime = 2; $sjaweek = 1; $sjamem = 16000; @sjaarray = @danerdir_arr_long; - + &send_jobarray; } if (@daner_dimeta_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "dadimeta"; $sjatime = 2; $sjamem = 1000; @sjaarray = @daner_dimeta_arr; - + &send_jobarray; } @@ -3043,13 +3043,13 @@ if (@daner_dimeta_arr > 0) { if (@daner_meta_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "dameta"; $sjatime = 2; $sjamem = 1000; @sjaarray = @daner_meta_arr; - + &send_jobarray; } @@ -3173,7 +3173,7 @@ unless (-e "$rootdir/resmeta_$outname.done") { my %refc; my $rf_count_max = 0; - + foreach my $rf (@result_files) { $rf =~ s/.gz$//; my $outdir = "$resdandir/".$rf; @@ -3237,8 +3237,8 @@ unless (-e "$rootdir/resmeta_$outname.done") { print "Error: $outdir/$rf.count\nhas too many entries,\nplease check other count files, maybe restart from scratch\n"; print "----------------------------------------------\n"; exit; - } - + } + } } } @@ -3288,13 +3288,13 @@ else { #} if (@chunker_arr > 0) { - + $sjadir = $resdandir; $sjaname = "chunk"; $sjatime = 2; $sjamem = 1000; @sjaarray = @chunker_arr; - + &send_jobarray; } @@ -3308,13 +3308,13 @@ if (@chunker_arr > 0) { #} if (@result_meta_arr > 0) { - + $sjadir = $reportdir; $sjaname = "resmet"; $sjatime = 2; $sjamem = 1000; @sjaarray = @result_meta_arr; - + &send_jobarray; } @@ -3386,8 +3386,8 @@ foreach my $cd (@daner_chunkdirs) { # print "check daner_summing\n"; chdir ($danerdir); - - + + #print "dameta_files: @dameta_files\n"; ##### META my @zehn = grep {/chr10_10/} @dameta_files; @@ -3395,7 +3395,7 @@ if (@zehn == 0 ) { @zehn = @dameta_files; } unless (-e "$reportdir/$daner_meta") { - unless ($cox) { + unless ($cox) { print "concatenate meta-results\n" if ($debug); if (@dameta_files == 0){ print "****\nError: no dameta files in list\n"; @@ -3424,18 +3424,18 @@ unless (-e "$reportdir/$daner_meta") { # &mysystem ($sys6); # chdir ($danerdir); - + ######################################## ######## here the single chromosomes ######################################### - - + + } } - + ##### MEGA unless ($nomega) { my @zehn = grep {/chr10_10/} @daner_files; @@ -3446,8 +3446,8 @@ unless ($nomega) { die "error, since missing: $zehn[0]" unless (-e $zehn[0]); print "concatenate $zehn[0]\n" if ($debug); # &mysystem ("rm $zehn[0]"); - - + + &mysystem ("gunzip -c $zehn[0] | head -1 | gzip -c > $reportdir/$daner_all"); &mysystem ("gunzip -c @daner_files | grep -v SNP | gzip -c >> $reportdir/$daner_all"); } @@ -3457,38 +3457,38 @@ unless ($nomega) { ### META-disease if ($n_cdg > 1) { foreach my $cx (keys %cdg_dis) { - unless ($cox) { + unless ($cox) { die "something wrong with disease" unless exists $dadimeta_files{$cx}; my @dadimeta_files_loc = @ { $dadimeta_files{$cx} } ; - + my @zehn = grep {/chr10_10/} @dadimeta_files_loc; if (@zehn == 0 ) { @zehn = @dadimeta_files_loc; } unless (-e "$reportdir/$daner_meta_pf.$cx.gz") { print "concatenate $zehn[0]\n"; - + ### have to work on metadisease!!!! print "debug, work on metadisease\n"; exit; my $sys_loc = "$damecat_script $danerdir $reportdir/$daner_meta_pf.$cx.gz @dadimeta_files_loc"; push @dametacat_arr, $sys_loc; - + } - + push @daner_cdg_coll, "$daner_meta_pf.$cx.gz"; } - + } } #exit; - - + + # foreach (@bimfli_files) { # print "bifli: $_\n"; # } - + ## single studies unless ($result_name){ foreach (@bimfli_files) { @@ -3498,10 +3498,10 @@ unless ($result_name){ my $danerdir_loc = "$danerdir/da_$bfile"; chdir ($danerdir_loc) or die "no danerdir_loc"; my $daner_bim = "daner_$bfile.gz"; - + push @daner_collect, $daner_bim; push @daner_collect_si, $daner_bim; - + unless (-e "$reportdir/$daner_bim") { opendir(DIR, ".") || die "can't opendir .: $!"; my @dalofiles = grep {/dan_.*assoc.dosage.ngt.gz$/} readdir(DIR); @@ -3509,19 +3509,19 @@ unless ($result_name){ my @zehn = grep {/chr10_10/} @dalofiles; if (@zehn == 0 ) { @zehn = @dalofiles; - } + } print "$zehn[0]\n" if ($debug); my $sys_loc = "$damecat_script $danerdir_loc $reportdir/$daner_bim @dalofiles"; push @dametacat_arr, $sys_loc; } - - + + ######################################## ######## here the single chromosomes ######################################### - - + + } } } @@ -3539,13 +3539,13 @@ chdir ($danerdir); if (@dametacat_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "damecat"; $sjatime = 1; $sjamem = 1000; @sjaarray = @dametacat_arr; - + &send_jobarray; } @@ -3563,7 +3563,7 @@ if ($nomega) { unless ($nohet_sw) { unless (-e "$reportdir/$daner_all_het") { - unless ($cox) { + unless ($cox) { print "create het_p_result\n" if ($debug); my $sys_loc = "$danerhet2p_script $reportdir/$daner_all $reportdir/$daner_all_het"; push @daner_het2p_arr, $sys_loc; @@ -3571,15 +3571,15 @@ unless (-e "$reportdir/$daner_all_het") { } } } - + if (@daner_het2p_arr > 0) { - + $sjadir = $danerjobdir; $sjaname = "daner_het2p"; $sjatime = 1; $sjamem = 1000; @sjaarray = @daner_het2p_arr; - + &send_jobarray; } @@ -3617,8 +3617,8 @@ if ($polyfrom_file ne "") { &mysystem("echo $sys | $blueprint_script --njob $job_bn_th -b \"prefix\" -j --na clump_$outname"); - - + + my $now = localtime time; my $message = $info_txt."\tclump\t$now"; @@ -3659,13 +3659,13 @@ unless (-e "placeholder.pdf"){ if (0) { my $scameta_out = "$daner_meta.pdf"; - + unless (-e $daner_meta){ &mysystem ("$blueprint_script --njob $job_bn_th -b \"~/plink/plink --out $daner_meta_pf --meta-analysis @daner_collect_si; gzip -f $daner_meta_pf.meta\" --wa 1 -j --di --na dameta_$outname; \n"); &reinvo_b ("dameta","$reportdir/blueprint_joblist_file-dameta_$outname"); } } - + ################################### ### META Disease Groups ################################### @@ -3680,7 +3680,7 @@ if (0) { my @starr = split '_' ,$_; $cdg_dis{$starr[2]} = 1; } - my @cdg_arr = (); + my @cdg_arr = (); foreach my $cx (keys %cdg_dis) { my @daner_collect_si_cx = grep {/^daner_qc2report_$cx/} @daner_collect_si; @@ -3736,12 +3736,12 @@ if (0) { # print OUT "\t$cells[11]"; print OUT "\t-"; print OUT "\t$cells[8]"; - print OUT "\t$cells[12]"; + print OUT "\t$cells[12]"; print OUT "\t$cells[6]"; print OUT "\t-"; print OUT "\n"; } - + close IN; close OUT; &mysystem ("gzip $daner_meta_nogz"); @@ -3770,10 +3770,10 @@ if (0){ unless (-e $daner_pf_cx_gz){ &mysystem ("gunzip $daner_meta_pf_cx_gz"); - + die $!." <$daner_meta_pf_cx>" unless open IN, "< $daner_meta_pf_cx"; die $! unless open OUT, "> $daner_pf_cx"; - + while (my $line = ){ my @cells = &split_line($line); print OUT "$cells[0]"; @@ -3788,18 +3788,18 @@ if (0){ # print OUT "\t$cells[11]"; print OUT "\t-"; print OUT "\t$cells[8]"; - print OUT "\t$cells[12]"; + print OUT "\t$cells[12]"; print OUT "\t$cells[6]"; print OUT "\t-"; print OUT "\n"; } - + close IN; close OUT; &mysystem ("gzip $daner_meta_pf_cx"); &mysystem ("gzip $daner_pf_cx"); - + } } } @@ -3902,13 +3902,13 @@ foreach my $dc_loc (@daner_allcoll) { if (@lth_arr > 0) { - + $sjadir = $reportdir; $sjaname = "lth"; $sjatime = 1; $sjamem = 1000; @sjaarray = @lth_arr; - + &send_jobarray; } @@ -3961,7 +3961,7 @@ unless (-e $prekno_file) { } unless (-e $prekno_file) { - + print "Error: cannot find gwas catalog file\n"; exit; } @@ -4000,13 +4000,13 @@ foreach my $dc_loc (@daner_allcoll) { if (@areat_arr > 0) { - + $sjadir = $reportdir; $sjaname = "areator"; $sjatime = 1; $sjamem = 3000; @sjaarray = @areat_arr; - + &send_jobarray; } @@ -4089,7 +4089,7 @@ while (my $line = ){ my @cells = &split_line($line); my $pv_loc = shift(@cells); my $a_outname = $cells[3]; - + if ($pv_loc eq "0") { push @areap_files, "placeholder.pdf"; } @@ -4109,13 +4109,13 @@ close FILE; if (@areap_arr > 0) { - + $sjadir = $reportdir; $sjaname = "areaplot"; $sjatime = 1; $sjamem = 1000; @sjaarray = @areap_arr; - + &send_jobarray; } @@ -4143,16 +4143,16 @@ if ($regplot_file) { } } close FILE; - - + + if (@regplot_arr > 0) { - + $sjadir = $reportdir; $sjaname = "regplot"; $sjatime = 1; $sjamem = 1000; @sjaarray = @regplot_arr; - + &send_jobarray; } } @@ -4229,13 +4229,13 @@ close FILE; #exit; if (@forestp_arr > 0) { - + $sjadir = $reportdir; $sjaname = "forestplot"; $sjatime = 1; $sjamem = 1000; @sjaarray = @forestp_arr; - + &send_jobarray; } @@ -4259,9 +4259,9 @@ if ($regplot_file) { &mysystem ("$pdfjoin_script --quiet --rotateoversize false --outfile $regplot_collection @regplot_files"); &mysystem ("gzip $regplot_collection"); &mysystem ("cp $regplot_collection.gz $distdir"); - + } - + } @@ -4275,7 +4275,7 @@ if (@forestp_files > 0) { &mysystem ("$pdfjoin_script --quiet --rotateoversize false --outfile $forest_plot_collection @forestp_files"); # &mysystem ("gzip $forest_plot_collection"); &mysystem ("cp $forest_plot_collection $distdir"); - + } } #sleep(10); @@ -4287,7 +4287,7 @@ if (@forestphet_files > 0) { &mysystem ("$pdfjoin_script --quiet --rotateoversize false --outfile $forest_plot_collection_het @forestphet_files"); &mysystem ("gzip $forest_plot_collection_het"); &mysystem ("cp $forest_plot_collection_het.gz $distdir"); - + } } @@ -4399,8 +4399,8 @@ foreach my $dc_loc (@daner_allcoll) { &mysystem ("touch manhattan.$dc_loc.nog2.pdf.empty"); print "empty: manhattan.$dc_loc.nog.pdf.empty\n" if ($debug); } - - + + unless (-e "manhattan.$dc_loc.nog.pdf") { my $sys4 = "$manhattanplot2_script --maxy 32 $p_gene_txt_nog --title Manhattan-Plot --sig-gwa --cols 2,11,1,3 -areator $m1mhcfile --out $dc_loc.nog $infile"; unless (-e "manhattan.$dc_loc.nog.pdf.empty") { @@ -4417,7 +4417,7 @@ foreach my $dc_loc (@daner_allcoll) { } # } - + @@ -4457,30 +4457,30 @@ if (0) { foreach (@bimfli_files) { my $bfile = $_; $bfile =~ s/.bim$//; - + my $daner_bim = "daner_$bfile.gz"; - + # push @sgwa_files, "s.$outname.$bfile"."_gwa.pdf"; push @sqq_files, "s.$outname.$bfile"."-qq.pdf"; - + my $title_loc = $bfile; $title_loc =~ s/qc2report//; - + push @qq_arr, "$qqplot_script --ac 100 --title QQ-plot-$title_loc.ac --cacohead -p 11 --out s.$outname.$bfile --ceiling 12 $daner_bim" unless (-e "s.$outname.$bfile"."-qq.pdf"); # push @gwa_arr, "gwa_plot --cols 2,11,1,3 --sig-gwa --title s.$outname.$bfile $daner_bim" unless (-e "s.$outname.$bfile"."_gwa.pdf"); - + } } if (@gwa_arr > 0) { - + $sjadir = $reportdir; $sjaname = "manhplot"; $sjatime = 1; $sjamem = 1000; @sjaarray = @gwa_arr; - + &send_jobarray; } @@ -4527,13 +4527,13 @@ unless (-e "$distdir/$areator_all_het.xls"){ if (@qq_arr > 0) { - + $sjadir = $reportdir; $sjaname = "qqplot"; $sjatime = 1; $sjamem = 3000; @sjaarray = @qq_arr; - + &send_jobarray; } @@ -4585,12 +4585,12 @@ if ($gwclump) { if ($serial) { $sys_tmp = "$clumpnavi2_script --noindel --pfile $daner_all --hq_f .05 --hq_i .9 --out $daner_all --clu_p1 1.0 --clu_p2 1.0 --clu_window 500 --clu_r2 0.1 --refdir $areat_refdir --serial --sepa $sepa"; } - + my $bsys = $sys_tmp; - - + + unless (-e "$daner_all.clumped.xmhc.gz") { - + unless ($noclumper) { if (1) { @@ -4601,7 +4601,7 @@ if ($gwclump) { my $sys_re = "$blueprint_script -b \"$bsys\" --wa 2 --di -j --na _cl_$daner_all"; &mysystem ($sys_re); } - + # &mysystem ($bsys); } else { @@ -4613,27 +4613,28 @@ if ($gwclump) { } if ($noti == 1) { - + &mysystem ('cat '.$areator_all.'.summary | '.$mutt_script.' -s Daner_results_ready_'.$outname.' '.$email) ; } - + &mysystem ("$sb_script manhattan.nog.$outname.pdf") ; &mysystem ("$sb_script $areator_all.xls") ; - + # &mysystem ("send_dropbox manhattan.nog.$outname.pdf") ; # &mysystem ("send_dropbox $areator_all.xls") ; - + } } } - + } print "debug4\n" if ($debug); my $numbers_file = "basic.$outname.num.xls"; unless (-e "$distdir/$numbers_file") { + chdir "$reportdir"; my $sys = "$numbers_script basic.$outname $daner_all @daner_single"; # print "####################################### DEBUG ###############\n"; @@ -4663,18 +4664,18 @@ if ($onlymeta) { unless (-e "$distdir/$daner_all") { &mysystem ("ln -sf $reportdir/$daner_meta $distdir/$daner_all"); } - - + + unless (-e "replic/README") { &mysystem ("mkdir replic") unless (-e "replic"); chdir ("replic"); - + &mysystem ("$linksub_script $distdir/$areator_all") ; &mysystem ("$linksub_script $distdir/$daner_all.p3.gz") ; - + my $repl_txt = "$replicator_script --dan --areator $areator_all --gwas $daner_all.p3.gz --format 7 --refdir $areat_refdir --index --trust --rep REPFILE --nca XXX --nco XXX"; - + &mysystem ("echo $repl_txt > README") ; } } @@ -4704,30 +4705,30 @@ my $ldsc_tar = "$daner_all.ldsc.tar.gz"; unless ($noldsc) { my @ldsc_arr = (); - - + + my $ldsc_fini = "$daner_all.ldsc.fini"; push @ldsc_arr, "$ldsc_script $daner_all" unless (-e $ldsc_fini); - - + + if (@ldsc_arr > 0) { - + $sjadir = $reportdir; $sjaname = "ldsc"; $sjatime = 1; $sjamem = 4000; @sjaarray = @ldsc_arr; - + &send_jobarray; } - - + + unless (-e "$distdir/ldsc.$daner_all.tar.gz") { - - + + &mysystem ("cp $daner_all.ldsc.sumstats.gz $distdir/$daner_all.ldsc.sumstats.gz"); &mysystem ("cp ldsc.$daner_all.tar.gz $distdir/ldsc.$daner_all.tar.gz"); - + } } @@ -4818,25 +4819,25 @@ if (0) { foreach (@bimfli_files) { my $bfile = $_; $bfile =~ s/.bim$//; - + my $daner_bim = "daner_$bfile.gz"; - - + + my $laout = "s.$outname.$bfile"."_ngt"; my $laout_pdf = "$laout"."_lama-page1.pdf"; push @lahu_pdf_arr, $laout_pdf; push @lahu_arr, "$lahunt_script -pcol 11 --best $best_lahunt --c1 12,1 --out $laout $daner_bim" unless (-e $laout_pdf); - + $laout = "s.$outname.$bfile"."_info"; $laout_pdf = "$laout"."_lama-page1.pdf"; push @lahu_pdf_arr, $laout_pdf; push @lahu_arr, "$lahunt_script -pcol 11 --best $best_lahunt --c1 8,10 --out $laout $daner_bim" unless (-e $laout_pdf); - + $laout = "s.$outname.$bfile"."_frqa"; $laout_pdf = "$laout"."_lama-page1.pdf"; push @lahu_pdf_arr, $laout_pdf; push @lahu_arr, "$lahunt_script -pcol 11 --best $best_lahunt --c1 6,10,f --out $laout $daner_bim" unless (-e $laout_pdf); - + $laout = "s.$outname.$bfile"."_frqu"; $laout_pdf = "$laout"."_lama-page1.pdf"; push @lahu_pdf_arr, $laout_pdf; @@ -4848,7 +4849,7 @@ if (0) { #if (1) { # if (@lahu_arr > 0) { # &a2filenew ("lahu_job_list", @lahu_arr); - + # my $lc = @lahu_arr; # my $jmem_loc = $jmem * 2; # &mysystem ("cat lahu_job_list | blueprint --njob $job_bn_th -b \"prefix\" --mem $jmem_sm --wa 2 -j --i 8,2 --na lahu_$outname"); @@ -4864,31 +4865,31 @@ if (0) { unless ($nolahunt){ if (@lahu_arr > 0) { - + $sjadir = $reportdir; $sjaname = "lahu"; $sjatime = 1; $sjamem = 2000; @sjaarray = @lahu_arr; - + &send_jobarray; } - + if (1) { unless (-e "$distdir/report.qc.$outname.pdf.gz") { - + #### lahunts &mysystem ("$pdfjoin_script --quiet --rotateoversize false --outfile lahunt.$outname.pdf @lahu_pdf_arr"); # &mysystem ("pdfjoin --outfile lahunt.$outname.pdf @lahu_pdf_arr $scameta_out"); &mysystem ("$pdfnup_script --quiet --nup 2x2 lahunt.$outname.pdf"); - + &mysystem ("gzip -f lahunt.$outname-nup.pdf"); &mysystem ("mv lahunt.$outname-nup.pdf.gz report.qc.$outname.pdf.gz"); &mysystem ("cp report.qc.$outname.pdf.gz $distdir"); - + } } } @@ -4910,7 +4911,7 @@ push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - + &send_jobarray; @@ -5054,7 +5055,7 @@ unless (-e "$areator_all.summary") { } if ($noti == 1) { - + &mysystem ('cat '.$areator_all.'.summary | '.$mutt_script.' -s Daner_results_ready_'.$outname.' '.$email) ; } @@ -5118,7 +5119,7 @@ push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - + &send_jobarray; diff --git a/rp_bin/refdir_navi b/rp_bin/refdir_navi index efa4cba..3a36f52 100755 --- a/rp_bin/refdir_navi +++ b/rp_bin/refdir_navi @@ -36,7 +36,7 @@ srand(0); # # my.wget (vcf_download): # - download vcf and genetic maps -# +# # # my.prepvcf_filtnorm (bcftools_filtnorm): # - create filtnorm-vcf with mac > 4 and normalize multiallelic SNPs @@ -64,7 +64,7 @@ srand(0); # # legend_chunk (chunk) # - define chunks with --nsnps out of filtnorm positions -# +# # pobed (plink_anc) # - separate plink binaries for each ancestry including frequencies # @@ -171,7 +171,7 @@ my $job_bn_th = 1000; my $chunksnps = 30000; ## min number of SNPs in each chunk use Getopt::Long; -GetOptions( +GetOptions( "out_templ=s"=> \$out_templ, "chunksnps=i"=> \$chunksnps, "sample_root=s"=> \$sample_root, @@ -199,7 +199,7 @@ GetOptions( "walltimeplus=i"=> \$walltimeplus, "serial"=> \my $serial_sw, "sepa=i"=> \$sepa, - + ); @@ -230,12 +230,12 @@ my $prepmm_script_m3vcf = "prepare_ref_mm_m3vcf"; ### my.pipeline_tar my $vcf2bcf_script = "vcf2bcf"; ### my.pipeline_tar my $filtnorm_translate_script = "filtnorm_translate_changes"; ### my.pipeline_tar my $filtnorm_filter_script = "filtnorm_filter"; ### my.pipeline_tar -#my $vcf_script = "$vloc/vcftools"; ### -my $annot_hg19pos_script = "annot_hg19pos"; ### +#my $vcf_script = "$vloc/vcftools"; ### +my $annot_hg19pos_script = "annot_hg19pos"; ### my $impute2beagle_script = "impute2beagle2"; ### not needed for now my $prepare_hm_ref_2_script = "prepare_hm_ref_2"; ### my.pipeline_tar my $beagle2plink_script = "beagle2plink"; ### not needed for now -my $plink_script = "$ploc/plink"; ### +my $plink_script = "$ploc/plink"; ### my $ref2subchr2_script = "ref2subchr2"; ### my.pipeline_tar my $beagle2impute_script = "beagle2impute"; ### my.pipeline_tar my $floc2sumfrq_script = "my.floc2sumfrq"; ### my.pipeline_tar @@ -276,7 +276,7 @@ my $err_scr = 0; die $! unless open FILE1, "> get_scripts_on_broad.txt"; foreach my $scr_name (@test_scripts) { my $scr_path = ''; - + for my $path ( split /:/, $ENV{PATH} ) { if ( -f "$path/$scr_name" && -x _ ) { print "$scr_name\tfound in $path\n" if ($debug); @@ -289,7 +289,7 @@ foreach my $scr_name (@test_scripts) { # print FILE1 "cp /home/unix/sripke/bin/$scr_name ./\n"; print "!!Error!! : No $scr_name command available\n" ; } - + } close FILE1; if ($err_scr == 1) { @@ -308,7 +308,7 @@ my $err_scr = 0; my $noti = 1; { my $scr_path = ''; - + for my $path ( split /:/, $ENV{PATH} ) { if ( -f "$path/$mutt_script" && -x _ ) { print "$mutt_script\tfound in $path\n" if ($debug); @@ -335,7 +335,7 @@ my $noti = 1; # sleep (3); } } - + } die if $err_scr == 1; @@ -358,12 +358,12 @@ version: $rp_version --outname STRING just as identifier --sample_root STRING file with identifiers and sex and ancestry, e.g. integrated_call_samples_v3.20130502.ALL.panel --out_templ STRING outname as temlate, e.g. ALL.august.chrXXX.recal.vrcut.EUR.beagle.vcf.imp - --vcf_templ STRING vcf template, - --vcf_site STRING ftp site containing vfc files, + --vcf_templ STRING vcf template, + --vcf_site STRING ftp site containing vfc files, - --mach_templ STRING mach template, 20101123.chrXXX, pointing to 20101123.chrXXX.hap.gz, 20101123.chrXXX.map and chrXXX.annotation.txt + --mach_templ STRING mach template, 20101123.chrXXX, pointing to 20101123.chrXXX.hap.gz, 20101123.chrXXX.map and chrXXX.annotation.txt - --imp2_templ STRING impute2 template, 20101123.chrXX, pointing to 20101123.chrXXX.hap.gz, 20101123.chrXXX.legend.gz + --imp2_templ STRING impute2 template, 20101123.chrXX, pointing to 20101123.chrXXX.hap.gz, 20101123.chrXXX.legend.gz e.g. /humgen/1kg/processing/allPopulations_wholeGenome_august_release/calls/chrXXX/ALL.august.chrXXX.recal.vrcut.EUR.beagle.vcf @@ -382,7 +382,7 @@ version: $rp_version --subname STRING subchr_postfix - --mhc STRING take phased MHC reference: + --mhc STRING take phased MHC reference: T1DGC_REF.bgl.phased T1DGC_REF.markers -> --mhc T1DGC_REF --mhead INT discard INT header rows, defailt: $mhc_head @@ -394,16 +394,16 @@ version: $rp_version --bn_job INT submit INT jobs at a time --force1 do not exit if same fail, but do this only once - - --chunksnps INT minimum number of SNPs in each chunk (default: $chunksnps) + + --chunksnps INT minimum number of SNPs in each chunk (default: $chunksnps) --serial no sending jobs to queue all in one run - -> usually only used for testing + -> usually only used for testing --sepa INT use INT number of parallel jobs during serial - --walltimeplus INT increase walltime by INT hours (important for bigger reference files), - prep_minimac_m3vcf gets ten times this amount since minimac is now single threaded + --walltimeplus INT increase walltime by INT hours (important for bigger reference files), + prep_minimac_m3vcf gets ten times this amount since minimac is now single threaded @@ -427,7 +427,7 @@ for classic 1KG phase 3 use this (files will be downloaded after confirmation); ## outdated: -annotation of 1KG positions: +annotation of 1KG positions: /humgen/1kg/analysis/main_project/Aug2010_whole_genome_release/bc.bi.ncbi.um.2of4.nogenotypes.refgene_annotated.vcf /fg/debakkerscratch/ripke/hapmap_ref/1KG_got2d_0311/bc.bi.ncbi.um.2of4.nogenotypes.refgene_annotated.vcf.chr1 @@ -600,12 +600,12 @@ sub send_jobarray { $fini_message .= "##### reference_pipeline finished successfully:\n"; $fini_message .= "##### $sjainfotxt\n"; - $fini_message .= "##### have a look at the wiki page\n"; + $fini_message .= "##### have a look at the wiki page\n"; $fini_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n"; $fini_message .= "##################################################################\n"; print "$fini_message\n"; - + die $! unless open SUC, "> success_file"; print SUC $fini_message."\n"; close SUC; @@ -669,14 +669,14 @@ sub send_jobarray { if ($old_cmd =~ /$sjarow_part/){ unless ($force1 ){ my $err_message ; - + $err_message .= "##################################################################\n"; $err_message .= "##### Error: \n"; $err_message .= "##### step $sjaname has been done repeatedly without any progress\n"; $err_message .= "##### reference pipeline stopped\n"; $err_message .= "##### $sjacontent\n"; $err_message .= "##### if reason does not appear obvious\n"; - $err_message .= "##### have a look at the wiki page\n"; + $err_message .= "##### have a look at the wiki page\n"; $err_message .= "##### https://sites.google.com/a/broadinstitute.org/ricopili/\n"; $err_message .= "##### or contact the developers\n"; $err_message .= "##### version: $rp_version\n"; @@ -692,7 +692,7 @@ sub send_jobarray { if ($noti == 1) { &mysystem ('cat error_file | '.$mutt_script.' -s RP_refdir_error '.$email) ; } - + exit; } @@ -729,34 +729,34 @@ sub send_jobarray { @job_sepa_arr = (); } $jc++; - + } if (@job_sepa_arr > 0) { $jc--; push @job_sepa_arr, "wait"; - + my $sepa_file = "$sjaname.sepa.$jc"; &a2filenew ($sepa_file,@job_sepa_arr); print "sepa_file: ".$sepa_file."\n" if ($debug); &mysystem("chmod u+x $sepa_file"); &mysystem("./$sepa_file"); } - + } - else { + else { my $sys_loc = "$blue_script $sja_week_str --noerr --njob $nsja_loc --array $jobfile --wa $sjatime --mem $sjamem --j --na $jobfile $multi_txt"; &mysystem ($sys_loc); } - - - + + + $command_line =~ s/--force1//; my $wt_file = "$sjadir/j.$jobfile.id"; - chdir "$rootdir" or die "something strange"; + chdir "$rootdir" or die "something strange"; @@ -768,7 +768,7 @@ sub send_jobarray { } else { my $sys_re = "$blue_script --njob $job_bn_th -b \"$command_line\" --wa 4 --di -j --fwt $wt_file --na _ref_$outname"; - + &mysystem ($sys_re); } @@ -871,7 +871,7 @@ my $date = "$cells[1]"."_"."$cells[4]"; #my @refgene_files = `ls $subchr_dir/refGene*`; my @refgene_files = `ls refGene*`; if (@refgene_files == 0) { - + print "********************************\n"; print "no refGene file found\n"; print "********************************\n"; @@ -900,15 +900,15 @@ if (@refgene_files == 0) { if ($answer eq "y" || $answer eq "Y"){ - + print "downloading most recent file...\n"; # chdir $subchr_dir; my $sys = "wget -nv http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/refGene.txt.gz "; &mysystem ($sys); - + print "unzip...\n" if ($debug); - $sys = "gunzip -c refGene.txt.gz > refGene.txt"; + $sys = "gunzip -c refGene.txt.gz > refGene.txt"; &mysystem ($sys); print "reformat...\n" if ($debug); @@ -922,13 +922,13 @@ if (@refgene_files == 0) { print "backup...\n" if ($debug); $sys = "mv refGene.txt.gz bak.refGene.txt.gz"; &mysystem ($sys); - + print "clean...\n" if ($debug); $sys = "rm refGene.txt"; &mysystem ($sys); - - + + print "success with refGene file...\n"; print "press any key and to continue\n"; @@ -937,18 +937,18 @@ if (@refgene_files == 0) { $answer = lc <>; print "********************************\n"; print "********************************\n"; - - + + } else { print "go ahead and then restart the pipeline\n"; exit; } - + } @@ -977,7 +977,7 @@ chomp($refgene_file); unless (-e "gwascatalog.$date.rp.txt") { #my @cat_files = `ls gwascatalog.$date.rp.txt`; #if (@cat_files == 0) { - + print "********************************\n"; print "no gwascatalog.txt file found\n"; print "********************************\n"; @@ -1009,7 +1009,7 @@ unless (-e "gwascatalog.$date.rp.txt") { if ($answer eq "y" || $answer eq "Y"){ my $now = localtime time; - + my @cells = split /\s+/, $now; my $date = "$cells[1]"."_"."$cells[4]"; @@ -1020,13 +1020,13 @@ unless (-e "gwascatalog.$date.rp.txt") { # chdir $subchr_dir; my $sys = "wget -nv http://www.ebi.ac.uk/gwas/api/search/downloads/full "; &mysystem ($sys); - + # print "replace space...\n"; -# $sys = 'tr " " "_" < full > gwascatalog.txt.ow'; +# $sys = 'tr " " "_" < full > gwascatalog.txt.ow'; # &mysystem ($sys); - + print "reformat...\n" if ($debug); die $!."(full)" unless open IN, "< full"; die $!."(gwascatalog.txt.ow.short)" unless open OUT, "> gwascatalog.txt.ow.short"; @@ -1045,7 +1045,7 @@ unless (-e "gwascatalog.$date.rp.txt") { # } # if ($cells[11] eq "Y") { # $cells[11] = 24; -# } +# } print OUT $cells[21]; print OUT "\t".$cells[11]; ## chromosome print OUT "\t".$cells[12]; @@ -1059,12 +1059,12 @@ unless (-e "gwascatalog.$date.rp.txt") { } print "sort...\n" if ($debug); - $sys = 'sort -k2,2n -k3,3n -k5,5g gwascatalog.txt.ow.short > gwascatalog.'.$date.'.rp.txt'; + $sys = 'sort -k2,2n -k3,3n -k5,5g gwascatalog.txt.ow.short > gwascatalog.'.$date.'.rp.txt'; &mysystem ($sys); $sys = 'gzip -f full'; &mysystem ($sys); - -# print "debug gwascatalog.rp.txt\n"; + +# print "debug gwascatalog.rp.txt\n"; # exit; @@ -1079,13 +1079,13 @@ unless (-e "gwascatalog.$date.rp.txt") { $answer = lc <>; - + } else { print "go ahead and then restart the pipeline\n"; exit; } - + } @@ -1146,7 +1146,7 @@ unless (-e "$sample_root") { print "********************************\n"; print "--sample_root ($sample_root) not found, should I try to download at $vcf_site (y/n)?\n"; - my $sys = "wget -n $vcf_site/$sample_root "; + my $sys = "wget -nv $vcf_site/$sample_root "; my $answer = lc <>; chomp $answer; @@ -1163,7 +1163,7 @@ unless (-e "$sample_root") { print "exit now\n"; exit; } - + } print "********************************\n"; @@ -1194,7 +1194,7 @@ unless (-e "reference_templ") { close RT; } - + @@ -1216,7 +1216,7 @@ my %ca_hash; ## read out the collection of continental ancestries unless (-e $sample_ca) { print "read continental info from $sample_root\n" if ($debug); - + die $!."($sample_root)" unless open IN, "< $sample_root"; my $line = ; #1 @@ -1276,16 +1276,16 @@ unless (-e $sample_fini) { my $sex_col = -1; my $iid_col = -1; my $panel_sex = 1; - + foreach my $cc (0..$#cells){ if ($cells[$cc] eq "gender") { $sex_col = $cc; } if ($cells[$cc] eq "sample" || $cells[$cc] eq "IID") { $iid_col = $cc; - } + } } - + if ($sex_col == -1){ print "Warning: no gender header in $sample_root\n"; $panel_sex = 0; @@ -1295,7 +1295,7 @@ unless (-e $sample_fini) { exit; } - + while (my $line = ){ my @cells = @{&split_line_ref(\$line)}; @@ -1307,7 +1307,7 @@ unless (-e $sample_fini) { $sex = 1; } my $iid = $cells[$iid_col]; -# print "$iid has sex $sex\n"; +# print "$iid has sex $sex\n"; $sex_hash{$iid} = $sex; } @@ -1315,7 +1315,7 @@ unless (-e $sample_fini) { - + if ($sex_col == -1){ print "read sex of chr22-samples\n"; my @chr22_samples = `ls *chr22*samples`; @@ -1333,7 +1333,7 @@ unless (-e $sample_fini) { } } - + die $!."($chr22_samples[0])" unless open CHR, "< $chr22_samples[0]"; while (my $line = ){ @@ -1347,14 +1347,14 @@ unless (-e $sample_fini) { # exit; - - + + print "creating root population famfiles\n" if ($debug); my @famlines; die $!."($sample_root)" unless open IN, "< $sample_root"; die $!."($sample_fam)" unless open OUT, "> $sample_fam"; - + my $line = ; #1 my @cells = @{&split_line_ref(\$line)}; @@ -1370,9 +1370,9 @@ unless (-e $sample_fini) { } if ($cells[$cc] eq "sample" || $cells[$cc] eq "IID") { $iid_col = $cc; - } + } } - + if ($super_pop_col == -1){ print "Error: no super_pop header in $sample_root\n"; exit; @@ -1387,7 +1387,7 @@ unless (-e $sample_fini) { } - + while (my $line = ){ my @cells = @{&split_line_ref(\$line)}; my $spop_cell = $cells[$super_pop_col]; @@ -1403,7 +1403,7 @@ unless (-e $sample_fini) { print "Error: no underscores are allowed for population column\n"; print "when using a new file, please also deleted this file: $sample_ca\n"; exit; - } + } my $fid = $spop_cell."_".$pop_cell; my $iid = $cells[$iid_col]; my $sex; @@ -1415,7 +1415,7 @@ unless (-e $sample_fini) { exit; } - + my $out_line = $fid; $out_line .= "\t".$iid; $out_line .= "\t0"; @@ -1435,7 +1435,7 @@ unless (-e $sample_fini) { print "creating population famfile for $ca\n" if ($debug); die $!."($sample_fam.$ca)" unless open OUT, "> $sample_fam.$ca"; - + foreach my $line (@famlines) { my @cells = @{&split_line_ref(\$line)}; my @anc = split /_/, $cells[0]; @@ -1446,9 +1446,9 @@ unless (-e $sample_fini) { } close OUT; - + } - &mysystem ("touch $sample_fini"); + &mysystem ("touch $sample_fini"); # print "debug: ".@famlines."\n"; } @@ -1481,18 +1481,18 @@ my $answer_rename = ""; foreach my $chrint ($chr_start..$chr_end){ my $chr = $chrint; - + if ($chrint == 23) { $chr = "X"; } - - + + my $vcf_name = $vcf_templ; $vcf_name =~ s/XXX/$chr/g; my $vcf_name_loc; - + if (-e $vcf_name) { $vcf_name_loc = $vcf_name; } @@ -1511,7 +1511,7 @@ foreach my $chrint ($chr_start..$chr_end){ print "comment: thats expected for HRC imputation reference, so please say y\n"; print "comment: also it's recommend to start with a copy (or a link of the files)\n"; } - + if ($answer_rename eq "") { $answer_rename = lc <>; chomp $answer_rename; @@ -1525,7 +1525,7 @@ foreach my $chrint ($chr_start..$chr_end){ $samples_name_new =~ s/.haplotypes.vcf.gz$/.samples/; &mysystem ("mv $samples_name_old $samples_name_new"); push @backup_arr, "mv $samples_name_new $samples_name_old\n"; - + my $vcf_name_old = $vcf_found; my $vcf_name_new = $vcf_name; $vcf_name_old =~ s/.samples$/.haplotypes.vcf.gz/; @@ -1537,7 +1537,7 @@ foreach my $chrint ($chr_start..$chr_end){ $legend_name_new =~ s/.haplotypes.vcf.gz$/.legend.gz/; $legend_name_old =~ s/.samples$/.legend.gz/; &mysystem ("mv $legend_name_old $legend_name_new"); - push @backup_arr, "mv $legend_name_new $legend_name_old\n"; + push @backup_arr, "mv $legend_name_new $legend_name_old\n"; } @@ -1548,9 +1548,9 @@ foreach my $chrint ($chr_start..$chr_end){ if (@vcf_files > 1) { print "Error: too many matching samples filenames\n"; exit; - } + } # print "here we go: @vcf_files\n"; - } + } } if (@backup_arr > 0) { @@ -1559,13 +1559,13 @@ if (@backup_arr > 0) { print BA$_; } close BA; - + print "-----------------------------------------------\n"; print "renamed many files\n"; print "if you want to undo, please have a look at $vcf_templ.backup\n"; print "please check if you are happy with the results\n"; print "if yes, then restart\n"; - exit; + exit; } @@ -1585,20 +1585,20 @@ foreach my $chrint ($chr_start..$chr_end){ # copy here #################################### my $chr = $chrint; - + if ($chrint == 23) { $chr = "X"; } - + $hapout_name = $out_templ.".impute"; $hapout_name =~ s/XXX/$chr/g; my $vcf_name_loc_filtnorm ; my $vcf_name_loc_filtnorm_reform ; - + if ($vcf_templ ne "") { - + my $vcf_name = $vcf_templ; @@ -1626,7 +1626,7 @@ foreach my $chrint ($chr_start..$chr_end){ my $vcf_name_remote = $vcf_templ; - + $vcf_name_loc_filtnorm = $vcf_name_loc.".filtnorm.gz"; my $vcf_name_loc_filtnorm_fini = $vcf_name_loc_filtnorm.".fini"; @@ -1635,19 +1635,19 @@ foreach my $chrint ($chr_start..$chr_end){ - + ################################### # download vcf files #################################### - + unless (-e $vcf_name_loc) { unless ($answer eq "y" || $answer eq "Y"){ print "********************************\n"; print "vcf-file ($vcf_name_loc) not found, should I try to download this and all other chromosomes(y/n)?\n"; } - + my $sys = "$wget_script $vcf_site $vcf_name "; # print "$sys\n"; @@ -1693,7 +1693,7 @@ foreach my $chrint ($chr_start..$chr_end){ my $gm_name_loc = "loc.".$gm_name; my $gm_name_fini = $gm_name.".fini"; - # print "$gm_name_loc now2\n"; + # print "$gm_name_loc now2\n"; unless (-e $gm_name_fini) { if (-e $gm_name_loc) { @@ -1702,7 +1702,7 @@ foreach my $chrint ($chr_start..$chr_end){ # $gm_name = $gm_template; # $gm_name =~ s/XXX/23/g; # } - + &mysystem ("mv $gm_name_loc $gm_name"); &mysystem ("touch $gm_name_fini"); @@ -1722,21 +1722,21 @@ foreach my $chrint ($chr_start..$chr_end){ print "debug: $sys\n"; sleep(1); } - - # exit; + + # exit; if ($answer_gm eq "") { $answer_gm = lc <>; chomp $answer_gm; } - - + + if ($answer_gm eq "y" || $answer_gm eq "Y"){ - + print "will put this into parallel jobs:\t$sys\n"; push @job_vcf_dl, $sys; - - + + } else { $exit = 1; @@ -1769,7 +1769,7 @@ foreach my $chrint ($chr_start..$chr_end){ } - + ################################### # filter indels # if indels on same position only keep the ones with highest AC @@ -1785,22 +1785,22 @@ foreach my $chrint ($chr_start..$chr_end){ # exit; } - - + + ################################### # create impute-format with bcftools #################################### - + my $hapout_name_fini = $hapout_name.".fini"; - + unless (-e $hapout_name_fini) { # print "hapout: $hapout_name\n"; push @job_btools2, "$prepvcf_script_imp2 --vcf $vcf_name_loc_filtnorm_reform --out $hapout_name"; # push @job_vtools, "$vcf_script --mac $mac_th --gzvcf $vcf_name_loc --IMPUTE --out $hap_name"; } - + # else {# # ### control log-file # die $!."($hap_name.log)" unless open LOG, "< $hap_name.log"; @@ -1819,7 +1819,7 @@ foreach my $chrint ($chr_start..$chr_end){ # } - + } @@ -1836,18 +1836,18 @@ foreach my $chrint ($chr_start..$chr_end){ # print "phased: $out_templ$imp_phased_postfix\n"; - + unless (-e $mach_name) { # print "mach_name: $mach_name\n"; die unless ("$mach_name.gz"); push @job_cpgz, "zcat $mach_name.gz > $mach_name.tmp; mv $mach_name.tmp $mach_name"; } - + ################################### # create impute-format with mach2impute #################################### - + $hapout_name = "$mach_name.hap"; my $map_name = $mach_templ.".map"; $map_name =~ s/XXX/$chr/g; @@ -1855,7 +1855,7 @@ foreach my $chrint ($chr_start..$chr_end){ $anno_name =~ s/XXX/$chr/g; - + unless (-e $hapout_name) { # print "hapout: $hapout_name\n"; push @job_vtools, "$mach2impute_script --hap $mach_name --anno $anno_name --map $map_name" @@ -1892,16 +1892,16 @@ foreach my $chrint ($chr_start..$chr_end){ } } - - + + if (0) { ################################### # create beagle - format (not done any more) #################################### - + my $phased_name = $out_templ.$imp_phased_postfix; $phased_name =~ s/XXX/$chr/g; @@ -1924,8 +1924,8 @@ foreach my $chrint ($chr_start..$chr_end){ } } - - + + my $infopos_name = $out_templ.$imp_phased_postfix.".bgl.info_pos"; $infopos_name =~ s/XXX/$chr/g; my $bgl_name_count = $out_templ.$imp_phased_postfix.".bgl.count"; @@ -1971,7 +1971,7 @@ foreach my $chrint ($chr_start..$chr_end){ my $changes_file = "$hapout_name.legend.gz.rf.gz.changes.gz"; my $filtnorm_trans_fini = $changes_file; $filtnorm_trans_fini =~ s/legend.gz.rf.gz.changes.gz/vcf.gz.fini/; - + unless (-e $filtnorm_trans_fini) { my $sys = "$filtnorm_translate_script --filtnorm $vcf_name_loc_filtnorm_reform --changes $changes_file"; @@ -2016,7 +2016,7 @@ foreach my $chrint ($chr_start..$chr_end){ ############################################################### - + my $vcf_name_loc = $filtnorm_trans_file; $vcf_name_loc =~ s/\.vcf\.gz/.vcf.bgz/;; @@ -2055,7 +2055,7 @@ foreach my $chrint ($chr_start..$chr_end){ push @chunks_collection_T3, "$hapout_name.legend.gz.chunks_T3"; - + ######################################################################## ## times 1 my $chunksnps_1 = $chunksnps * 1; @@ -2067,7 +2067,7 @@ foreach my $chrint ($chr_start..$chr_end){ push @chunks_collection_1, "$hapout_name.legend.gz.chunks_1"; - + ######################################################################## ## times 2 my $chunksnps_2 = $chunksnps * 2; @@ -2103,7 +2103,7 @@ foreach my $chrint ($chr_start..$chr_end){ } push @chunks_collection_10, "$hapout_name.legend.gz.chunks_10"; - + ######################################################################## ## times 20 my $chunksnps_20 = $chunksnps * 20; @@ -2119,7 +2119,7 @@ foreach my $chrint ($chr_start..$chr_end){ if (1) { foreach my $pf_loc (@pop_fams) { - + my $freq_root = "$plink_name.freq.$pf_loc"; my $keep_file = "$sample_fam.$pf_loc"; @@ -2127,19 +2127,19 @@ foreach my $chrint ($chr_start..$chr_end){ # print "$pf_loc -> $bfile_pf\n"; - + unless (-e "$bfile_pf.fini"){ # push @job_pobed, "$plink_script --bfile $plink_name --out $bfile_pf --keep $keep_file --make-bed --freq"; push @job_pobed, "$plinkanc_script --bfile $plink_name --out $bfile_pf --keep $keep_file"; } - + # unless (-e "$freq_root.frq"){ # my $sys_f = "$plink_script --bfile $bfile_pf --out $freq_root --freq"; # push @job_frq, "$sys_f"; - + # if (0) { # unless (-e "$keep_file.sex") { # print "create $keep_file.sex\n"; @@ -2155,7 +2155,7 @@ foreach my $chrint ($chr_start..$chr_end){ # close KI; # close KO; # } - + # my $sys_f = "$plink_script --bfile $bgl_name --out $freq_root --keep $keep_file --freq --update-sex $keep_file.sex"; # push @job_frq, "$sys_f"; # } @@ -2188,9 +2188,9 @@ if (@job_vcf_dl > 0 ) { $sjadir = $rootdir; $sjaname = "vcf_download"; $sjatime = 2 + $walltimeplus; - $sjamem = 2000; + $sjamem = 2000; @sjaarray = @job_vcf_dl; - + &send_jobarray; @@ -2208,7 +2208,7 @@ if (@job_vcf_dl > 0 ) { # &send_jobarray; -# } +# } if (@job_cpgz > 0 ) { @@ -2219,7 +2219,7 @@ if (@job_cpgz > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_cpgz; - + &send_jobarray; @@ -2236,7 +2236,7 @@ if (@job_vtools > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 4000; @sjaarray = @job_vtools; - + &send_jobarray; @@ -2251,7 +2251,7 @@ if (@job_btools1 > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_btools1; - + &send_jobarray; @@ -2267,7 +2267,7 @@ if (@job_filtnormfil > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_filtnormfil; - + &send_jobarray; @@ -2286,11 +2286,11 @@ if (@job_btools2 > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_btools2; - + &send_jobarray; -} +} # exit; @@ -2302,7 +2302,7 @@ if (0) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_phased; - + &send_jobarray; } if (@job_bgl > 0 ) { @@ -2311,7 +2311,7 @@ if (0) { $sjatime = 2 + $walltimeplus; $sjamem = 4000; @sjaarray = @job_bgl; - + &send_jobarray; } } @@ -2323,7 +2323,7 @@ if (@job_plink > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_plink; - + &send_jobarray; } @@ -2336,7 +2336,7 @@ if (@job_refformat > 0 ) { $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_refformat; - + &send_jobarray; } @@ -2345,55 +2345,55 @@ if (@job_refformat > 0 ) { if (@job_filtnormtra > 0 ) { - - + + $sjadir = $rootdir; $sjaname = "trans_filtnorm"; $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_filtnormtra; - + &send_jobarray; - - + + } if (@job_prepmm_bgz > 0 ) { - - + + $sjadir = $rootdir; $sjaname = "prep_minimac_bgz"; $sjatime = 2 + $walltimeplus; $sjamem = 2000; ## each core, so mem request for the node is multi that @sjaarray = @job_prepmm_bgz; - + &send_jobarray; } if (@job_prepmm_tabix > 0 ) { - - + + $sjadir = $rootdir; $sjaname = "prep_minimac_tabix"; $sjatime = 2 ; $sjamem = 2000; ## each core, so mem request for the node is multi that @sjaarray = @job_prepmm_tabix; - + &send_jobarray; } if (@job_prepmm_m3vcf > 0 ) { - - + + $sjadir = $rootdir; $sjaname = "prep_minimac_m3vcf"; $sjatime = 2 + $walltimeplus * 10; # $sjamulti = $ncpus_multi; $sjamem = 24000; ## each core, so mem request for the node is multi that @sjaarray = @job_prepmm_m3vcf; - + &send_jobarray; } @@ -2401,18 +2401,18 @@ if (@job_prepmm_m3vcf > 0 ) { #exit; if (@job_vcf2bcf > 0 ) { - - + + # $sjaweek = 1; $sjadir = $rootdir; $sjaname = "vcf2bcf"; $sjatime = 2 + $walltimeplus; $sjamem = 2000; ## each core, so mem request for the node is multi that @sjaarray = @job_vcf2bcf; - + &send_jobarray; - - + + } @@ -2423,21 +2423,21 @@ if (@job_chunk > 0 ) { $sjatime = 2; $sjamem = 2000; @sjaarray = @job_chunk; - + &send_jobarray; } if (@job_pobed > 0 ) { - + $sjadir = $rootdir; $sjaname = "pobed"; $sjatime = 2 + $walltimeplus; $sjamem = 4000; @sjaarray = @job_pobed; - - &send_jobarray; + + &send_jobarray; } @@ -2469,7 +2469,7 @@ unless (-e "infosum_pos.nsnps") { &mysystem ("rm infosum_pos.nsnps.tmp"); } &mysystem ("touch infosum_pos.nsnps.tmp"); - + foreach my $p (@plink_collection) { &mysystem ("wc -l $p.bim >> infosum_pos.nsnps.tmp"); # print "$p\n"; @@ -2568,47 +2568,47 @@ unless (-e "infosum_pos.chunks_20") { - + # print "debug\n"; # exit; if (0){ - + foreach my $pf_loc (@pop_fams) { - + unless (-e "sumfrq.$pf_loc") { - - + + my $sys_f = "$floc2sumfrq_script --chrstart $chr_start --chrend $chr_end --out sumfrq.$pf_loc --bim $out_templ"."$imp_phased_postfix".".bgl.bim --frq $out_templ.$pf_loc.frq"; # print "$sys_f\n"; push @job_sumfrq, "$sys_f"; - + } - + unless (-e "sumfrq.$pf_loc.done") { - - + + my $sys_f = "$floc2sumfrq_script2 --chrstart $chr_start --chrend $chr_end --out sumfrq.$pf_loc --bim $out_templ"."$imp_phased_postfix".".bgl.bim --frq $out_templ.$pf_loc.frq"; # print "$sys_f\n"; push @job_sumfrq, "$sys_f"; - + } - + } - - + + if (@job_sumfrq > 0 ) { - + $sjadir = $rootdir; $sjaname = "sumfreq"; $sjatime = 2 + $walltimeplus; $sjamem = 2000; @sjaarray = @job_sumfrq; - + &send_jobarray; - + } } @@ -2617,12 +2617,12 @@ unless (-e "infosum_pos.chunks_20") { - + #print "$rootdir\n"; #exit; - + # &mysystem ("touch $rootdir/rootdir_done"); - + #} @@ -2634,15 +2634,15 @@ unless (-e "infosum_pos.chunks_20") { foreach my $pf_loc (@pop_fams) { - + unless (-e "pop_$pf_loc") {&mysystem ("mkdir pop_$pf_loc")};# print "----------------------------------------------\n" if ($debug); print "----------------------------------------------\n" if ($debug); print "work on ancestry subdir pop_$pf_loc\n" if ($debug); - print "----------------------------------------------\n" if ($debug); + print "----------------------------------------------\n" if ($debug); - print "create reference_templ to ancestry subdir\n" if ($debug); + print "create reference_templ to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/reference_templ") { print "create file reference_templ\n" if ($debug); die $! unless open RT, "> pop_$pf_loc/reference_templ"; @@ -2653,61 +2653,61 @@ foreach my $pf_loc (@pop_fams) { close RT; } - print "copy chunkinfo to ancestry subdir\n" if ($debug); + print "copy chunkinfo to ancestry subdir\n" if ($debug); - print "copy chunkinfo_T3 to ancestry subdir\n" if ($debug); + print "copy chunkinfo_T3 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_T3") { &mysystem ("cp infosum_pos.chunks_T3 pop_$pf_loc/"); } - - print "copy chunkinfo_1 to ancestry subdir\n" if ($debug); + + print "copy chunkinfo_1 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_1") { &mysystem ("cp infosum_pos.chunks_1 pop_$pf_loc/"); } - - print "copy chunkinfo_2 to ancestry subdir\n" if ($debug); + + print "copy chunkinfo_2 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_2") { &mysystem ("cp infosum_pos.chunks_2 pop_$pf_loc/"); } - print "copy chunkinfo_5 to ancestry subdir\n" if ($debug); + print "copy chunkinfo_5 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_5") { &mysystem ("cp infosum_pos.chunks_5 pop_$pf_loc/"); } - print "copy chunkinfo_10 to ancestry subdir\n" if ($debug); + print "copy chunkinfo_10 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_10") { &mysystem ("cp infosum_pos.chunks_10 pop_$pf_loc/"); } - print "copy chunkinfo_20 to ancestry subdir\n" if ($debug); + print "copy chunkinfo_20 to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/infosum_pos.chunks_20") { &mysystem ("cp infosum_pos.chunks_20 pop_$pf_loc/"); } - - - print "copy gwascatalog to ancestry subdir\n" if ($debug); + + + print "copy gwascatalog to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/gwascatalog.$date.rp.txt") { &mysystem ("cp gwascatalog.$date.rp.txt pop_$pf_loc/"); } - print "copy refGene to ancestry subdir\n" if ($debug); + print "copy refGene to ancestry subdir\n" if ($debug); unless (-e "pop_$pf_loc/$refgene_file") { &mysystem ("cp $refgene_file pop_$pf_loc/"); } - - - + + + foreach my $chrint ($chr_start..$chr_end){ my $chr = $chrint; - + if ($chrint == 23) { $chr = "X"; } - + $hapout_name = $out_templ.".impute"; $hapout_name =~ s/XXX/$chr/g; - my $plink_name = $hapout_name.".plink"; + my $plink_name = $hapout_name.".plink"; my $bfile_pf = "$plink_name.$pf_loc"; - + unless (-e "pop_$pf_loc/$bfile_pf.fam") { print "moving $bfile_pf.bed/bim/fam\n" if ($debug); system ("mv $bfile_pf.bed pop_$pf_loc/"); @@ -2726,7 +2726,7 @@ foreach my $pf_loc (@pop_fams) { $gm_name_source =~ s/XXX/X_nonPAR/g; my $gm_name_target = $gm_template; $gm_name_target =~ s/XXX/X/g; - + unless (-e $gm_name_target){ &mysystem ("cp $gm_name_source $gm_name_target") } @@ -2735,38 +2735,38 @@ foreach my $pf_loc (@pop_fams) { $gmchr_name_source =~ s/XXX/X_nonPAR/g; my $gmchr_name_target = $gmchr_template; $gmchr_name_target =~ s/XXX/X/g; - + unless (-e $gmchr_name_target){ &mysystem ("cp $gmchr_name_source $gmchr_name_target") } - + my $gmchr_name_source = $gmchr_template_23; $gmchr_name_source =~ s/XXX/X_nonPAR/g; my $gmchr_name_target = $gmchr_template_23; $gmchr_name_target =~ s/XXX/X/g; - + unless (-e $gmchr_name_target){ &mysystem ("cp $gmchr_name_source $gmchr_name_target") } - + } - + $gm_name = $gm_template; $gm_name =~ s/XXX/$chr/g; - + unless (-e "pop_$pf_loc/$gm_name") { system ("cp $rootdir/$gm_name pop_$pf_loc/$gm_name"); } # print "$rootdir/$gm_name\n"; - + } } #} @@ -2774,7 +2774,7 @@ foreach my $pf_loc (@pop_fams) { - + ############################################################# ## SUCCESSSS @@ -2786,7 +2786,7 @@ push @sjaarray, "tmp"; $sjatime = 2; $sjamem = 1000; - + &send_jobarray; @@ -2848,26 +2848,26 @@ if ($q16) { my $suminfo_n = "$suminfo.nsnps"; my $suminfo_r = "$suminfo.reffiles"; # my $suminfo_s = "$suminfo.sorted"; - + print "summary files q16\n"; # unless (-e "$suminfo_s"){ # &mysystem ("cat *.info_pos | grep -v SNP > $suminfo"); # &mysystem ("sort -k1,1 -u $suminfo > $suminfo_s.tmp"); # &mysystem ("mv $suminfo_s.tmp $suminfo_s"); # } - + unless (-e "$suminfo_n"){ &mysystem ("wc -l *.info_pos > $suminfo_n.tmp"); &mysystem ("mv $suminfo_n.tmp $suminfo_n"); } - + unless (-e "$suminfo_r"){ &mysystem ("ls $q16_phased > $suminfo_r.tmp"); &mysystem ("mv $suminfo_r.tmp $suminfo_r"); - + } - my $gema_file = "genetic_map_chr6_combined_b37.txt"; + my $gema_file = "genetic_map_chr6_combined_b37.txt"; unless (-e $gema_file) { &mysystem ("ln -s /fg/debakker_scratch/ripke/hapmap_ref/impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/$gema_file ."); @@ -2878,7 +2878,7 @@ if ($q16) { # unless (-e "sumfrq.eur.sorted"){ # &mysystem ("cp $suminfo_s sumfrq.eur.sorted.tmp"); # &mysystem ("mv sumfrq.eur.sorted.tmp sumfrq.eur.sorted"); - + # } @@ -2940,26 +2940,26 @@ if ($mhc) { my $suminfo_n = "$suminfo.nsnps"; my $suminfo_r = "$suminfo.reffiles"; # my $suminfo_s = "$suminfo.sorted"; - + print "summary files mhc\n"; # unless (-e "$suminfo_s"){ # &mysystem ("cat *.info_pos | grep -v SNP > $suminfo"); # &mysystem ("sort -k1,1 -u $suminfo > $suminfo_s.tmp"); # &mysystem ("mv $suminfo_s.tmp $suminfo_s"); # } - + unless (-e "$suminfo_n"){ &mysystem ("wc -l *.info_pos > $suminfo_n.tmp"); &mysystem ("mv $suminfo_n.tmp $suminfo_n"); } - + unless (-e "$suminfo_r"){ &mysystem ("ls $mhc_phased > $suminfo_r.tmp"); &mysystem ("mv $suminfo_r.tmp $suminfo_r"); - + } - my $gema_file = "genetic_map_chr6_combined_b37.txt"; + my $gema_file = "genetic_map_chr6_combined_b37.txt"; unless (-e $gema_file) { &mysystem ("ln -s /fg/debakker_scratch/ripke/hapmap_ref/impute2_ref/1KG_Aug12/ALL_1000G_phase1integrated_v3_impute_macGT1/$gema_file ."); @@ -2969,7 +2969,7 @@ if ($mhc) { # unless (-e "sumfrq.eur.sorted"){ # &mysystem ("cp $suminfo_s sumfrq.eur.sorted.tmp"); # &mysystem ("mv sumfrq.eur.sorted.tmp sumfrq.eur.sorted"); - + # } @@ -3002,7 +3002,7 @@ foreach my $pf_loc(@pop_fams) { } - + #### first link foreach my $chr ($chr_start..$chr_end){ my $phased_name = $out_templ.$imp_phased_postfix; @@ -3021,7 +3021,7 @@ foreach my $chr ($chr_start..$chr_end){ &mysystem ("ln -s $rootdir/$bgl_name.fam ."); } - my $gema_file = "genetic_map_chr$chr"."_combined_b37.txt"; + my $gema_file = "genetic_map_chr$chr"."_combined_b37.txt"; unless (-e "$rootdir/$gema_file") { $gema_file = "genetic_map_chr$chr"."_combined_b36.txt"; unless (-e "$rootdir/$gema_file") { @@ -3040,7 +3040,7 @@ my @job_r2s = (); my @job_b2i = (); my @job_b2p = (); my @job_preps = (); -#### +#### foreach my $chr ($chr_start..$chr_end){ #### ref2subchr @@ -3059,7 +3059,7 @@ foreach my $chr ($chr_start..$chr_end){ $fam_name =~ s/XXX/$chr/g; # print "fam_name: $fam_name\n"; # exit; - + unless (-e "$phased_name.done"){ # if (@sc_files == 0){ push @job_r2s, "$ref2subchr2_script --scsize $scsize $phased_name"; @@ -3112,13 +3112,13 @@ if (@job_r2s > 0 ) { $sjatime = 2; $sjamem = 4000; @sjaarray = @job_r2s; - + &send_jobarray; } - + if (@job_preps > 0 ) { @@ -3128,13 +3128,13 @@ if (@job_preps > 0 ) { $sjatime = 1; $sjamem = 2000; @sjaarray = @job_preps; - + &send_jobarray; } - + if (@job_b2i > 0 ) { $sjadir = $subchr_dir; @@ -3142,7 +3142,7 @@ if (@job_b2i > 0 ) { $sjatime = 1; $sjamem = 2000; @sjaarray = @job_b2i; - + &send_jobarray; @@ -3157,7 +3157,7 @@ if (@job_b2p > 0 ) { $sjatime = 1; $sjamem = 2000; @sjaarray = @job_b2p; - + &send_jobarray; @@ -3167,7 +3167,7 @@ if (@job_b2p > 0 ) { - + my $suminfo = "infosum_pos"; my $suminfo_n = "$suminfo.nsnps"; my $suminfo_r = "$suminfo.reffiles"; @@ -3193,10 +3193,9 @@ push @sjaarray, "tmp"; $sjatime = 1; $sjamem = 1000; - -&send_jobarray; +&send_jobarray; -exit; +exit; \ No newline at end of file