Skip to content

Commit 4bd87bc

Browse files
committed
Merge pull request #27 from ICGC-TCGA-PanCancer/dev
Dev - merge 1.0.0 into master ready for release
2 parents b971310 + 217d3ce commit 4bd87bc

26 files changed

+351
-65
lines changed

.gitignore

-2
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@
22
/blib
33
/docs
44
/pm_to_blib
5-
/MYMETA.yml
6-
/MYMETA.json
75
/MANIFEST.bak
86
/install_tmp
97
/setup.log

Changes

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
1.0.0
2+
bam_stats.pl actually installed now.
3+
Basic *.bas perl access module.
4+
Upgraded libmaus/biobambam to resolve patch and CentOS install issue.
5+
Reference implementations ensure unique RG:ID between files.
16
0.3.0
27
Changes for the re-worked PanCancer submission SOP.
38
Patch for libmaus issue as not going to be a release in time.

INSTALL

+19-8
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,25 @@ OS:
1414
Other Software
1515
For installation to proceed you require the following packages:
1616

17-
zlib1g-dev
18-
g++
19-
dh-autoreconf
20-
libncurses-dev
21-
pkg-config
22-
libgd2-xpm-dev
23-
24-
This listing is based on Ubuntu 12.04
17+
For Ubuntu (tested with 12.04)
18+
apt-get
19+
zlib1g-dev
20+
g++
21+
dh-autoreconf
22+
libncurses-dev
23+
pkg-config
24+
libgd2-xpm-dev
25+
26+
For CentOS (tested with 6.4)
27+
yum install
28+
zlib-devel
29+
gcc-c++
30+
autoconf
31+
automake
32+
libtool
33+
boost-devel.x86_64
34+
ncurses-devel.x86_64
35+
gd-devel
2536

2637
setup.sh will install
2738
biobambam

MANIFEST

+4-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ docs.tar.gz
1010
INSTALL
1111
lib/PCAP.pm
1212
lib/PCAP/Bam.pm
13+
lib/PCAP/Bam/Bas.pm
1314
lib/PCAP/Bam/Stats.pm
1415
lib/PCAP/Bwa.pm
1516
lib/PCAP/Bwa/Meta.pm
@@ -20,7 +21,6 @@ LICENSE
2021
Makefile.PL
2122
MANIFEST This list of files
2223
MANIFEST.SKIP
23-
patches/fileModeFix.diff
2424
prerelease.sh
2525
README.md
2626
setup.sh
@@ -31,6 +31,7 @@ t/2_pl_compile.t
3131
t/3_external_progs.t
3232
t/pcap.t
3333
t/pcapBam.t
34+
t/pcapBamBas.t
3435
t/pcapBamStats.t
3536
t/pcapBwa.t
3637
t/pcapBwaMeta.t
@@ -44,6 +45,7 @@ testData/2_1.fq
4445
testData/3_2.fq
4546
testData/data.file
4647
testData/empty.bam
48+
testData/empty.bam.bas
4749
testData/empty.file
4850
testData/empty.fq
4951
testData/empty_r1_1.fq
@@ -64,4 +66,5 @@ testData/not_really_a.bam
6466
testData/paired.bam
6567
testData/Stats.bam
6668
testData/Stats.bam.bas
69+
testData/test.bam.bas
6770
testData/unpaired.bam

MYMETA.json

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
{
2+
"abstract" : "unknown",
3+
"author" : [
4+
"unknown"
5+
],
6+
"dynamic_config" : 0,
7+
"generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560",
8+
"license" : [
9+
"unknown"
10+
],
11+
"meta-spec" : {
12+
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
13+
"version" : "2"
14+
},
15+
"name" : "PCAP",
16+
"no_index" : {
17+
"directory" : [
18+
"t",
19+
"inc"
20+
]
21+
},
22+
"prereqs" : {
23+
"build" : {
24+
"requires" : {
25+
"ExtUtils::MakeMaker" : "0"
26+
}
27+
},
28+
"configure" : {
29+
"requires" : {
30+
"ExtUtils::MakeMaker" : "0"
31+
}
32+
},
33+
"runtime" : {
34+
"requires" : {
35+
"Bio::DB::Sam" : "1.39",
36+
"Bio::Root::Version" : "1.006923",
37+
"Capture::Tiny" : "0.24",
38+
"Const::Fast" : "0.014",
39+
"Data::UUID" : "1.219",
40+
"Devel::Cover" : "1.09",
41+
"File::Which" : "0.05",
42+
"GD" : "2.52",
43+
"IPC::System::Simple" : "1.25",
44+
"List::Util" : "1.38",
45+
"Math::Gradient" : "0.04",
46+
"Module::Build" : "0.42",
47+
"Pod::Coverage" : "0.23",
48+
"Proc::ProcessTable" : "0.5",
49+
"Term::UI" : "0.42",
50+
"Test::Fatal" : "0.013",
51+
"Try::Tiny" : "0.19"
52+
}
53+
}
54+
},
55+
"release_status" : "stable",
56+
"version" : "v1.0.0"
57+
}

MYMETA.yml

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
---
2+
abstract: unknown
3+
author:
4+
- unknown
5+
build_requires:
6+
ExtUtils::MakeMaker: 0
7+
configure_requires:
8+
ExtUtils::MakeMaker: 0
9+
dynamic_config: 0
10+
generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560'
11+
license: unknown
12+
meta-spec:
13+
url: http://module-build.sourceforge.net/META-spec-v1.4.html
14+
version: 1.4
15+
name: PCAP
16+
no_index:
17+
directory:
18+
- t
19+
- inc
20+
requires:
21+
Bio::DB::Sam: 1.39
22+
Bio::Root::Version: 1.006923
23+
Capture::Tiny: 0.24
24+
Const::Fast: 0.014
25+
Data::UUID: 1.219
26+
Devel::Cover: 1.09
27+
File::Which: 0.05
28+
GD: 2.52
29+
IPC::System::Simple: 1.25
30+
List::Util: 1.38
31+
Math::Gradient: 0.04
32+
Module::Build: 0.42
33+
Pod::Coverage: 0.23
34+
Proc::ProcessTable: 0.5
35+
Term::UI: 0.42
36+
Test::Fatal: 0.013
37+
Try::Tiny: 0.19
38+
version: v1.0.0

Makefile.PL

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ WriteMakefile(
3131
EXE_FILES => [qw( bin/bam_to_sra_sub.pl
3232
bin/bwa_aln.pl
3333
bin/bwa_mem.pl
34+
bin/bam_stats.pl
3435
bin/diff_bams.pl
3536
bin/monitor.pl)],
3637
PREREQ_PM => {

bin/bwa_aln.pl

-3
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@ BEGIN
4343
use PCAP::Bwa;
4444
use PCAP::Bwa::Meta;
4545

46-
my @mod_list = keys %INC;
47-
exit 0 if(first {$_ =~ m|^Devel/Cover| } @mod_list);
48-
4946
const my @VALID_PROCESS => qw(bam2fq aln sampe mark);
5047
const my %INDEX_FACTOR => ( 'bam2fq' => 1,
5148
'aln' => 2,

bin/bwa_mem.pl

-3
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,6 @@ BEGIN
4343
use PCAP::Bwa::Meta;
4444
use version;
4545

46-
my @mod_list = keys %INC;
47-
exit 0 if(first {$_ =~ m|^Devel/Cover| } @mod_list);
48-
4946
const my @VALID_PROCESS => qw(bwamem mark);
5047
const my %INDEX_FACTOR => ( 'bwamem' => 1,
5148
'mark' => 1,);

docs.tar.gz

5.64 KB
Binary file not shown.

lib/PCAP.pm

+8-6
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ package PCAP;
2323
use strict;
2424
use Const::Fast qw(const);
2525

26-
our $VERSION = '0.3.0';
26+
our $VERSION = '1.0.0';
2727

2828
const my $LICENSE =>
2929
"#################
@@ -33,11 +33,13 @@ const my $LICENSE =>
3333
#################";
3434

3535
const my $DEFAULT_PATH => 'biobambam,samtools,bwa';
36-
const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,samtools,bwa',
37-
'0.1.1' => 'biobambam,bwa',
38-
'0.1.2' => 'biobambam',
39-
'0.2.0' => 'biobambam',
40-
'0.3.0' => '',
36+
const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,samtools,bwa',
37+
'0.1.1' => 'biobambam,bwa',
38+
'0.1.2' => 'biobambam',
39+
'0.2.0' => 'biobambam',
40+
'0.2.99' => 'biobambam',
41+
'0.3.0' => 'biobambam',
42+
'1.0.0' => '',
4143
);
4244

4345
sub license {

lib/PCAP/Bam.pm

+8-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ use File::Which qw(which);
3333
use Bio::DB::Sam;
3434
use Carp qw(croak);
3535
use List::Util qw(first);
36+
use Data::UUID;
3637

3738
use PCAP::Threaded;
3839

@@ -51,14 +52,18 @@ sub new {
5152
}
5253

5354
sub rg_line_for_output {
54-
my $bam = shift;
55+
my ($bam, $uniq_id) = @_;
5556
my $sam = sam_ob($bam);
5657
my $header = $sam->header->text;
5758
my $rg_line;
5859
while($header =~ m/^(\@RG\t[^\n]+)/xmsg) {
5960
my $new_rg = $1;
6061
die "BAM file appears to contain data for multiple readgroups, not supported: \n\n$header\n" if(defined $rg_line);
6162
$rg_line = $new_rg;
63+
if($uniq_id) {
64+
my $uuid = lc Data::UUID->new->create_str;
65+
$rg_line =~ s/\tID:[^\t]+/\tID:$uuid/;
66+
}
6267
$rg_line =~ s/\t/\\t/g;
6368
}
6469
return ($rg_line, $sam); # also return the SAM object
@@ -359,6 +364,8 @@ The SAM object is also returned should it be useful for other calls
359364
Takes BAM or Bio::DB::Sam object as input and returns the string representation for the RG line.
360365
Intended for use when adding RG to BWA MEM output and is only useful in single RG BAMs
361366
367+
Optional second boolean arg causes ID to be replaced with a UUID.
368+
362369
The SAM object is also returned should it be useful for other calls
363370
364371
=item sam_ob

lib/PCAP/Bam/Bas.pm

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package PCAP::Bam::Bas;
2+
3+
##########LICENCE##########
4+
# PCAP - NGS reference implementations and helper code for the ICGC/TCGA Pan-Cancer Analysis Project
5+
# Copyright (C) 2014 ICGC PanCancer Project
6+
#
7+
# This program is free software; you can redistribute it and/or
8+
# modify it under the terms of the GNU General Public License
9+
# as published by the Free Software Foundation; either version 2
10+
# of the License, or (at your option) any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program; if not see:
19+
# http://www.gnu.org/licenses/gpl-2.0.html
20+
##########LICENCE##########
21+
22+
use PCAP;
23+
our $VERSION = PCAP->VERSION;
24+
25+
use strict;
26+
use English qw( -no_match_vars );
27+
use warnings FATAL=>'all';
28+
use autodie qw( :all );
29+
use Carp qw(croak carp);
30+
31+
sub new {
32+
my ($class, $bas) = @_;
33+
my $self = { };
34+
bless $self, $class;
35+
$self->_init($bas);
36+
return $self;
37+
}
38+
39+
sub _init {
40+
my ($self, $bas) = @_;
41+
croak "No bas file defined" if(!defined $bas);
42+
die "*.bas file: $bas does not exist" unless(-e $bas);
43+
die "*.bas file: $bas is empty" unless(-s $bas);
44+
open my $IN, '<', $bas;
45+
$self->bas_keys($IN);
46+
$self->_import_data($IN);
47+
close $IN;
48+
return 1;
49+
}
50+
51+
sub _import_data {
52+
my ($self, $fh) = @_;
53+
while(my $line = <$fh>) {
54+
chomp $line;
55+
my @bits = split /\t/, $line;
56+
my %rg;
57+
for my $key(@{$self->bas_keys}) {
58+
$rg{$key} = $bits[$self->{'key_pos_map'}->{$key}];
59+
}
60+
$self->{'_data'}->{$rg{'readgroup'}} = \%rg;
61+
}
62+
return 1;
63+
}
64+
65+
sub bas_keys {
66+
my ($self, $key_fh) = @_;
67+
croak "bas_keys should only be initialised once\n" if(exists $self->{'keys'} && defined $key_fh);
68+
if(defined $key_fh) {
69+
my $line = <$key_fh>;
70+
chomp $line;
71+
my @head = split /\t/, $line;
72+
my %key_pos_map;
73+
my $pos=0;
74+
for my $key(@head) {
75+
$key_pos_map{$key} = $pos++;
76+
}
77+
$self->{'keys'} = \@head;
78+
$self->{'key_pos_map'} = \%key_pos_map;
79+
}
80+
return $self->{'keys'};
81+
}
82+
83+
sub get {
84+
my ($self, $rg, $key) = @_;
85+
die qq{Readgroup '$rg' does not exist\n} unless(exists $self->{'_data'}->{$rg});
86+
return exists $self->{'_data'}->{$rg}->{$key} ? $self->{'_data'}->{$rg}->{$key} : undef;
87+
}
88+
89+
1;
90+
91+
__END__
92+
93+
=head1 PCAP::Bam::Bas
94+
95+
Convenience class for accessing data in a *.bas file.
96+
97+
=head2 METHODS
98+
99+
=over 2
100+
101+
=item new
102+
103+
Construct an access object for BAM statistics file.
104+
105+
my $bas_ob = PCAP::Bam::Bas->new($bas);
106+
107+
=item bas_keys
108+
109+
Returns the list of available keys for this BAS file.
110+
111+
=item get
112+
113+
Retrieve a value by its readgroup and key:
114+
115+
$bas->($rg, 'median_insert_size');
116+
117+
NOTE: Returns undef if a key is not available.
118+
119+
=back

0 commit comments

Comments
 (0)