Skip to content

Commit

Permalink
Refactor bb reader (bigwig / bigbed) to account for files with unusua…
Browse files Browse the repository at this point in the history
…l layouts. Specifically chromTreeOffset > fullDataOffset, but also others. (#1890)
  • Loading branch information
jrobinso authored Sep 7, 2024
1 parent c952301 commit 46066e5
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 30 deletions.
103 changes: 77 additions & 26 deletions js/bigwig/bwReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,16 @@ class BWReader {
}
}

/**
* The BB header consists of
* (1) the common header
* (2) the zoom headers
* (3) autosql
* (4) total summary block (version 2 and later)
*
* In addition, we read the chromomsome B+ tree
* @returns {Promise<*>}
*/
async loadHeader() {

if (this.header) {
Expand All @@ -298,7 +308,7 @@ class BWReader {
// Assume low-to-high unless proven otherwise
this.littleEndian = true

let binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
const binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
let magic = binaryParser.getUInt()
if (magic === BIGWIG_MAGIC_LTH) {
this.type = "bigwig"
Expand Down Expand Up @@ -335,66 +345,107 @@ class BWReader {
extensionOffset: binaryParser.getLong()
}

// Read the next chunk containing zoom headers, autosql, and total summary if present. TotalSummary size = 40 bytes
const startOffset = BBFILE_HEADER_SIZE
const size = header.totalSummaryOffset > 0 ?
header.totalSummaryOffset - startOffset + 40 :
Math.min(header.fullDataOffset, header.chromTreeOffset) - startOffset
let range = {
start: startOffset,
size: (header.fullDataOffset - startOffset + 4)
size: size
}
data = await this.loader.loadArrayBuffer(this.path, buildOptions(this.config, {range: range}))

const nZooms = header.nZoomLevels
binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
const extHeaderParser = new BinaryParser(new DataView(data), this.littleEndian)

// Load zoom headers, store in order of decreasing reduction level (increasing resolution)
const nZooms = header.nZoomLevels
this.zoomLevelHeaders = []
this.firstZoomDataOffset = Number.MAX_SAFE_INTEGER
for (let i = 1; i <= nZooms; i++) {
const zoomNumber = nZooms - i
const zlh = new ZoomLevelHeader(zoomNumber, binaryParser)
const zlh = new ZoomLevelHeader(zoomNumber, extHeaderParser)
this.firstZoomDataOffset = Math.min(zlh.dataOffset, this.firstZoomDataOffset)
this.zoomLevelHeaders[zoomNumber] = zlh
}

// Autosql
if (header.autoSqlOffset > 0) {
binaryParser.position = header.autoSqlOffset - startOffset
const autoSqlString = binaryParser.getString()
extHeaderParser.position = header.autoSqlOffset - startOffset
const autoSqlString = extHeaderParser.getString()
if (autoSqlString) {
this.autoSql = parseAutoSQL(autoSqlString)
}
}

// Total summary
if (header.totalSummaryOffset > 0) {
binaryParser.position = header.totalSummaryOffset - startOffset
this.totalSummary = new BWTotalSummary(binaryParser)
}

// Chrom data index
if (header.chromTreeOffset > 0) {
binaryParser.position = header.chromTreeOffset - startOffset
this.chromTree = await ChromTree.parseTree(binaryParser, startOffset, this.genome)
this.chrNames = new Set(this.chromTree.idToName)
} else {
// TODO -- this is an error, not expected
throw "BigWig chromosome tree offset <= 0"
extHeaderParser.position = header.totalSummaryOffset - startOffset
this.totalSummary = new BWTotalSummary(extHeaderParser)
}

//Finally total data count
binaryParser.position = header.fullDataOffset - startOffset
header.dataCount = binaryParser.getInt()
// Chrom data index. The start is known, size is not, but we can estimate it
const bufferSize = Math.min(200000, Math.max(10000, header.fullDataOffset - header.chromTreeOffset))
this.chromTree = await this.#readChromTree(header.chromTreeOffset, bufferSize)
this.chrNames = new Set(this.chromTree.idToName)

this.featureDensity = header.dataCount / this.chromTree.sumLengths
// Estimate feature density from dataCount (bigbed only)
if("bigbed" === this.type) {
const dataCount = await this.#readDataCount(header.fullDataOffset)
this.featureDensity = dataCount / this.chromTree.sumLengths
}

this.header = header


//extension
if (header.extensionOffset > 0) {
await this.loadExtendedHeader(header.extensionOffset)
}
return this.header
return this.header
}
}

async #readDataCount(offset) {
const data = await this.loader.loadArrayBuffer(this.path, buildOptions(this.config, {
range: {
start: offset,
size: 4
}
}))
const binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
return binaryParser.getInt()
}

/**
* Used when the chromTreeOffset is > fullDataOffset, that is when the chrom tree is not in the initial chunk
* read for parsing the header. We know the start position, but not the total size of the chrom tree
*
* @returns {Promise<void>}
*/
async #readChromTree(chromTreeOffset, bufferSize) {

let size = bufferSize
const load = async () => {
const data = await this.loader.loadArrayBuffer(this.path, buildOptions(this.config, {
range: {
start: chromTreeOffset,
size: size
}
}))
const binaryParser = new BinaryParser(new DataView(data), this.littleEndian)
return ChromTree.parseTree(binaryParser, chromTreeOffset, this.genome)
}

let error
while (size < 1000000) {
try {
const chromTree = await load()
return chromTree
} catch (e) {
error = e
size *= 2
}
}
throw (error)
}

async loadExtendedHeader(offset) {
Expand Down
3 changes: 1 addition & 2 deletions js/bigwig/chromTree.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ export default class ChromTree {
const idToName = []
let sumLengths = 0
const readTreeNode = (offset) => {

if (offset >= 0) binaryParser.position = offset
const type = binaryParser.getByte()
const reserved = binaryParser.getByte()
Expand Down Expand Up @@ -69,7 +68,7 @@ export default class ChromTree {
}

// Recursively walk tree to populate dictionary
readTreeNode(binaryParser, -1)
readTreeNode( -1)

return new ChromTree(header, nameToId, idToName, sumLengths)
}
Expand Down
14 changes: 14 additions & 0 deletions js/binary.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,20 @@ class BinaryParser {
this.length = dataView.byteLength
}

/**
* Print the first "n" bytes to the console. Used for debugging.
* @param n
*/
dumpBytes (n = 100) {
const pos = this.position
const bytes = []
for(let i=0; i<= n; i++) {
bytes.push(this.getByte())
}
console.log(bytes.join(" "))
this.setPosition(pos)
}

setPosition(position) {
this.position = position
}
Expand Down
16 changes: 14 additions & 2 deletions test/testBigwig.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ suite("testBigWig", function () {

this.timeout(10000)

//chr21:19,146,376-19,193,466
const url = "https://s3.amazonaws.com/igv.org.test/data/uncompressed.bw",
chr = "chr21",
start = 0,
Expand All @@ -21,14 +20,27 @@ suite("testBigWig", function () {
const bwReader = new BWReader({url: url})
const features = await bwReader.readFeatures(chr, start, chr, end, bpPerPixel)
assert.equal(features.length, 8) // Verified in iPad app
})


/**
* Test a BW file with an unusual layout (chromTree after full data).
*/
test("chromTree", async function () {

this.timeout(10000)

const url = "https://data.broadinstitute.org/igvdata/test/data/bb/chromTreeTest.bigwig"
const bwReader = new BWReader({url: url})
const header = await bwReader.loadHeader()
assert.ok(header)
assert.equal(bwReader.chrNames.size, 6)
})

test("bigwig", async function () {

this.timeout(10000)

//chr21:19,146,376-19,193,466
const url = "test/data/bb/fixedStep.bw"
const chr = "chr1"
const bwReader = new BWReader({url: url})
Expand Down

0 comments on commit 46066e5

Please sign in to comment.