-
Notifications
You must be signed in to change notification settings - Fork 0
/
tab3col_to_bedgraph.pl
executable file
·99 lines (68 loc) · 2.36 KB
/
tab3col_to_bedgraph.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
# Description: This script converts a 3 columns tabular format, where columns are chr, start, value, to bedGraph format. Input file may be compressed as .gz.
# Coordinates in both input and bedGraph output are assumed to be 0-based (http://genome.ucsc.edu/goldenPath/help/bedgraph.html).
# Usage: tab3col_to_bedgraph.pl --tab input.tsv --bedgraph output.bedgraph
# --tab : specify input file in 3 columns tabular format, where columns are chr, start, value.
# --bedgraph : specify output file in bedgraph format.
# Credits: This script was written by Sebastien Vigneau (sebastien.vigneau@gmail.com) in Alexander Gimelbrant lab (Dana-Farber Cancer Institute).
use strict;
use warnings;
use Getopt::Long;
my $usage = "Usage: $0 --tab <infile.tsv> --bedgraph <outfile.bedgraph>";
# Parse command line arguments
my $infile; # 3 columns input file name
my $outfile; # bedgraph output file name
GetOptions (
"tab=s" => \$infile,
"bedgraph=s" => \$outfile,
) or die ("Error in command line arguments!\n$usage\n");
# Open input file. If it is compressed with gunzip, uncompress it.
if ($infile =~ /\.gz$/){
open(IN,'-|',"gunzip -c $infile") || die "Could not open $infile: $!\n";
} else {
open(IN,'<',$infile) || die "Could not open $infile: $!\n";
}
# Open output file.
open(OUT,'>',$outfile) || die "Could not open $outfile: $!\n";
# Conversion to bedgraph starts here.
# Declare variables.
my $chr;
my $start;
my $end;
my $val;
my $step;
my $prev_chr;
my $prev_start;
my $prev_end;
my $prev_val;
while (<IN>) {
chomp;
# Skip comment lines
next if (/^#/);
# Save previous line information
$prev_chr = $chr;
$prev_start = $start;
$prev_val = $val;
# Parse relevant information in current line
# e.g: chr1 0 2
($chr, $start, $val) = split(/\t/);
# Continue to next line if first line of file
next if (! defined $prev_chr);
# Update step size if current line belongs to same chromosome as previous line.
# Otherwise, keep step unchanged.
if ($chr eq $prev_chr) {
$step = $start - $prev_start;
}
# Print information for previous line
$prev_end = $prev_start + $step;
print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
}
# Print last line
$prev_chr = $chr;
$prev_start = $start;
$prev_end = $prev_start + $step;
$prev_val = $val;
print OUT "$prev_chr\t$prev_start\t$prev_end\t$prev_val\n";
close(IN);
close(OUT);
exit(0);