Skip to content

Commit

Permalink
Initial implementation of block slicing for #79.
Browse files Browse the repository at this point in the history
  • Loading branch information
csw committed Jul 11, 2012
1 parent e77d682 commit e0aa80a
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 2 deletions.
19 changes: 19 additions & 0 deletions features/slice.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Feature: MAF slicing
In order to obtain just the alignment data covering a given region
I want to be able to take slices of alignment blocks over
A given interval

Scenario: Interval covering two blocks
Given a MAF source file "mm8_chr7_tiny.maf"
And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
When I open it with a MAF reader
And I enable the :remove_gaps parser option
And open a new MAF writer
And write the header from the original MAF file
And filter for only the species
| mm8 |
| rn4 |
And search for blocks between positions 80082350 and 80082380 of mm8.chr7
And slice the resulting blocks according to the given interval
And write all the matched blocks
Then the output should match, except whitespace, "mm8_chr7_tiny_slice1.maf"
4 changes: 2 additions & 2 deletions features/step_definitions/index_steps.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
end

When /^search for blocks between positions (\d+) and (\d+) of (\S+)$/ do |i_start, i_end, chr|
int = Bio::GenomicInterval.zero_based(chr, i_start.to_i, i_end.to_i)
@blocks = @idx.find([int], @parser, @block_filter).to_a
@interval = Bio::GenomicInterval.zero_based(chr, i_start.to_i, i_end.to_i)
@blocks = @idx.find([@interval], @parser, @block_filter).to_a
end

Then /^(\d+) blocks? (?:is|are) obtained$/ do |num|
Expand Down
4 changes: 4 additions & 0 deletions features/step_definitions/output_steps.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
@writer.write_blocks(@parser.parse_blocks)
end

When /^write all the matched blocks$/ do
@writer.write_blocks(@blocks)
end

RSpec::Matchers.define :match_except_ws do |expected|
match do |actual|
system("diff --ignore-space-change --brief #{expected} #{actual} >/dev/null 2>&1")
Expand Down
4 changes: 4 additions & 0 deletions features/step_definitions/slice_steps.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
When /^slice the resulting blocks according to the given interval$/ do
# @blocks and @interval
@blocks = @blocks.collect { |b| b.slice(@interval) }
end
54 changes: 54 additions & 0 deletions lib/bio/maf/maf.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
module Bio
class GenomicInterval
def intersection(other)
raise ArgumentError unless self.chrom == other.chrom
GenomicInterval.new(self.chrom,
[self.chr_start, other.chr_start].max,
[self.chr_end, other.chr_end].min)
end
end

module MAF

# A MAF header, containing the variable-value pairs from the first
Expand Down Expand Up @@ -116,6 +125,31 @@ def remove_gaps!
gaps.size
end

def slice(interval)
case interval.compare(ref_seq.interval)
when :contains, :equal
return self
when :contained_by, :left_overlapped, :right_overlapped
_slice(interval.intersection(ref_seq.interval))
when :left_adjacent, :right_adjacent, :left_off, :right_off
raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
when :different_chrom
raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
else
raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
end
end

def _slice(interval)
offset = interval.zero_start - ref_seq.start
i_len = interval.length
s2 = sequences.collect { |s| s.slice(offset, i_len) }
v2 = vars.dup
v2[:score] = '0.0'
# TODO: should the filtered param be #modified? instead?
Block.new(v2, s2, offset, size, @filtered)
end

end

# A sequence within an alignment block.
Expand Down Expand Up @@ -153,6 +187,22 @@ def end
start + size
end

def interval
GenomicInterval.zero_based(self.source, self.start, self.end)
end

def slice(offset, len)
s2 = Sequence.new(source,
start + offset,
len,
strand,
src_size,
text.slice(offset, len))
s2.quality = quality.slice(offset, len) if quality
# TODO: what to do with synteny data?
s2
end

# Whether this sequence is empty. Only true for {EmptySequence}
# instances from 'e' lines.
def empty?
Expand Down Expand Up @@ -253,6 +303,10 @@ def text
''
end

def slice(offset, len)
self
end

def empty?
true
end
Expand Down
9 changes: 9 additions & 0 deletions test/data/mm8_chr7_tiny_slice1.maf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
##maf version=1
a score=0.0
s mm8.chr7 80082350 18 + 145134094 TGGAGGGCGGTCCCAGCA
s rn4.chr1 136011801 18 + 267910886 CGGAGGGCGGTCCCAGCA

a score=0.0
s mm8.chr7 80082368 12 + 145134094 TGAGAGGGCATG
s rn4.chr1 136011819 12 + 267910886 TGAGAGGGCATG

0 comments on commit e0aa80a

Please sign in to comment.