forked from Ales-ibt/in_house_scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstats_MM.pl
83 lines (73 loc) · 1.95 KB
/
stats_MM.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl
use strict;
######## THIS PROGRAM CALCULATE THE BASIC STATS OF AN ASSEMBLY
######## Alejandra Escobar, EMBL-EBI/Sanger
######## May 25, 2019
my @longitudes=() ;
my @long=() ;
my @ordenado=() ;
my @parametros=() ;
my $elemento ;
my $elementoS ;
my $elementoN ;
my $linea ;
my $N ;
my $totalBases ;
my $valorMedio ;
my $sumaElem ;
my $contadorN ;
my $posicionN ;
my $tamanoProm ;
my $contcontig ;
my $Totalong ;
my $longlinea ;
my $ultVal ;
scalar(@ARGV) == 1 || die "usage: $0 <assembly.fasta>\n";
my $file = $ARGV[0] ;
print "assembly\tTotal_contig\tAverage_contig_size\tLargest_contig_size\tShortest_contig_size\tGenome_size\tN50\tL50\tN50_bases\tN90\tL90\tN90_bases\n";
print "$file\t";
open (ARCH, $file) or die $! ;
while (<ARCH>) {
chomp ;
my $linea = $_ ;
if ($linea =~ /^>/) {
$contcontig++ ;
push (@longitudes, $Totalong) ;
$Totalong = 0 ;
}else{
$longlinea = length($linea) ;
$Totalong += $longlinea ;
}
$ultVal = $Totalong ;
}
push (@longitudes, $ultVal) ;
##################### SORTING CONTIG LENGTHS #############
@ordenado = sort { $b <=> $a } (@longitudes) ;
foreach $elemento (@ordenado) {
$totalBases += $elemento ;
}
$tamanoProm = int $totalBases/$contcontig ;
print "$contcontig\t$tamanoProm\t$ordenado[0]\t$ordenado[-2]\t$totalBases\t" ;
##################### SPECIFYING VALUES OF N ##############################################
@parametros = (50,90) ;
foreach $elementoS (@parametros) {
$valorMedio = 0 ;
$sumaElem = 0 ;
$contadorN = 0 ;
&calcN($elementoS) ;
}
print "\n";
exit;
#################### COMPUTING N VALUES ####################
sub calcN {
$N = $_[0] ;
$valorMedio = ($N/100)*$totalBases ;
foreach $elementoN (@ordenado) {
unless ($sumaElem >= $valorMedio) {
$sumaElem+= $elementoN ;
$contadorN++ ;
}
}
$posicionN = $contadorN -1 ;
return print "$ordenado[$posicionN]\t$contadorN\t$sumaElem\t" ;
}