-
Notifications
You must be signed in to change notification settings - Fork 0
/
arisa_raw_for_trex.pl
executable file
·84 lines (68 loc) · 1.92 KB
/
arisa_raw_for_trex.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/perl
use warnings;
use strict;
use Getopt::Long;
use Text::CSV;
=pod
Written by: Will Overholt
4-16-2013
Script takes the raw .csv file from the bioanalyzer as input
It goes through and converts to bioanalyzer output format to genemapper's input
which is required by T-REX (http://trex.biohpc.org/index.aspx).
Input = bioanalyzer.csv file
output_dir = directory you want to save the file
Script outputs:
[1] arisa_out.txt
- this is the converted fragment file
- columns are: (1) dye,peak# [just used blue as default] (2) sample name;
(3) peak size; (4) peak height; (5) peak area; (6) default 1 [required by T-REX, not used]
- to modify the output columns change the array values in line 71
[2] arisa_label.txt
- T-REX requires a labeling file
- this file only contains the sample name by default
- user will have to add meta data (see T-REX documentation)
=cut
#input files
my $file = $ARGV[0];
my $out_dir = $ARGV[1];
my $csv = Text::CSV->new();
open (CSV, "<$file") or die $!;
#files the script creates as output
open (RESULTS, ">$out_dir/arisa_out.txt");
open (LABEL, ">$out_dir/arisa_label.txt");
print LABEL "FileName\n";
my @array;
my $name;
my $i;
my $j;
while(<CSV>) { #read each line from the CSV file
unless ($_ =~ m/^Name/ || $_ =~ m/^Size/) { #screws up if I don't skip these lines (symbols I think?)
if ($csv->parse($_)) {
my @columns = $csv->fields();
if ($columns[0] =~ m/Sample Name/) {
foreach (@array) {
print RESULTS $_."\n";
}
@array=();
$j++;
$name = $columns[1];
print LABEL "$name\n";
$i = 0;
}
elsif ($columns[0] =~ m/[0-9]+/) {
$i++;
my $size = $columns[0];
$size =~ s/,//g;
push(@array, "B,$i\t$name\t$size\t$columns[6]\t$columns[9]\t1");
}
}
else {
my $err = $csv->error_input;
print "Failed to parse lin: $err";
}
}
}
foreach (@array) {
print RESULTS $_."\n";
}
close CSV;