-
Notifications
You must be signed in to change notification settings - Fork 596
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
271 additions
and
129 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
src/main/java/org/broadinstitute/hellbender/tools/variantdb/BasicArrayData.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package org.broadinstitute.hellbender.tools.variantdb; | ||
|
||
import static org.broadinstitute.hellbender.tools.variantdb.BinaryUtils.*; | ||
|
||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
|
||
public class BasicArrayData { | ||
public static enum ArrayGenotype { | ||
// Order is critical here, the ordinal is the int encoding | ||
AA,AB, BB, NO_CALL | ||
} | ||
|
||
public int sampleId; | ||
public int probeId; | ||
public ArrayGenotype genotype; | ||
|
||
public static final int GT_LENGTH = 2; | ||
public static final int PROBE_ID_LENGTH = 30; | ||
public static final int MAX_PROBE_ID_VALUE = (int) Math.pow(2, PROBE_ID_LENGTH) - 1; | ||
|
||
public static final int SAMPLE_ID_LENGTH = 30; | ||
public static final int MAX_SAMPLE_ID_VALUE = (int) Math.pow(2, SAMPLE_ID_LENGTH) - 1; | ||
|
||
public static final int GT_OFFSET = 0; | ||
public static final int PROBE_ID_OFFSET = GT_OFFSET + GT_LENGTH; | ||
public static final int SAMPLE_ID_OFFSET = PROBE_ID_OFFSET + PROBE_ID_LENGTH; | ||
|
||
public BasicArrayData(int sampleId, int probeId, ArrayGenotype genotype) { | ||
// check that the sizes fit | ||
if (sampleId < 0 || sampleId > MAX_SAMPLE_ID_VALUE) { | ||
throw new GATKException("Attempted sample id of " + sampleId + " which is great than the maximum of " + MAX_SAMPLE_ID_VALUE); | ||
} | ||
|
||
if (probeId < 0 || probeId > MAX_PROBE_ID_VALUE) { | ||
throw new GATKException("Attempted sample id of " + probeId + " which is great than the maximum of " + MAX_PROBE_ID_VALUE); | ||
} | ||
|
||
this.sampleId = sampleId; | ||
this.probeId = probeId; | ||
this.genotype = genotype; | ||
} | ||
|
||
public BasicArrayData(long bits) { | ||
this.genotype = decodeGenotype((int) extractBits(bits, GT_OFFSET, GT_LENGTH)); | ||
this.probeId = (int) extractBits(bits, PROBE_ID_OFFSET, PROBE_ID_LENGTH); | ||
this.sampleId = (int) extractBits(bits, SAMPLE_ID_OFFSET, SAMPLE_ID_LENGTH); | ||
} | ||
|
||
private ArrayGenotype decodeGenotype(int i) { | ||
return ArrayGenotype.values()[i]; | ||
} | ||
|
||
private int encodeGenotype(ArrayGenotype g) { | ||
return g.ordinal(); | ||
} | ||
|
||
public long encode() { | ||
return ( | ||
((long) encodeGenotype(this.genotype) << GT_OFFSET) | | ||
((long) this.probeId << PROBE_ID_OFFSET ) | | ||
((long) this.sampleId << SAMPLE_ID_OFFSET ) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
133 changes: 83 additions & 50 deletions
133
src/main/java/org/broadinstitute/hellbender/tools/variantdb/RawArrayData.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,76 +1,109 @@ | ||
package org.broadinstitute.hellbender.tools.variantdb; | ||
|
||
import java.math.*; | ||
import static org.broadinstitute.hellbender.tools.variantdb.BinaryUtils.*; | ||
|
||
public class RawArrayData { | ||
public static enum ArrayGenotype { | ||
// Order is critical here, the ordinal is the int encoding | ||
AA,AB, BB, NO_CALL | ||
} | ||
|
||
// TODO: turn these all into getters/setters with precision checks (e.g. baf) | ||
int probeId; | ||
ArrayGenotype genotype; | ||
Float normx; | ||
Float normy; | ||
Float baf; | ||
Float lrr; | ||
|
||
static ArrayGenotype decodeGenotype(int i) { | ||
return ArrayGenotype.values()[i]; | ||
public Float normx; | ||
public Float normy; | ||
public Float baf; | ||
public Float lrr; | ||
|
||
public RawArrayData(Float normx, Float normy, Float baf, Float lrr) { | ||
this.normx = normx; | ||
this.normy = normy; | ||
this.baf = baf; | ||
this.lrr = lrr; | ||
} | ||
|
||
static int encodeGenotype(ArrayGenotype g) { | ||
return g.ordinal(); | ||
public static final int NORMX_OFFSET = 0; | ||
public static final int NORMY_OFFSET = 16; | ||
public static final int LRR_OFFSET = 32; | ||
public static final int BAF_OFFSET = 48; | ||
|
||
private static final int MIN_16_BIT_VALUE = 0; | ||
private static final int MAX_16_BIT_VALUE = (int) Math.pow(2, 16) - 2; // reserve for null | ||
private static final int NULL_ENCODING = MAX_16_BIT_VALUE + 1; | ||
|
||
private static final int MIN_10_BIT_VALUE = 0; | ||
private static final int MAX_10_BIT_VALUE = (int) Math.pow(2, 10) - 2; // reserve for null | ||
private static final int NULL_10_BIT_ENCODING = MAX_10_BIT_VALUE + 1; | ||
|
||
// store a float with 3-decimal digits in 16 bits by | ||
// multiplying by 1000 and capping values, reserving | ||
// xFFFF FFFF to represent null | ||
public static int encode(Float f) { | ||
return encode(f,0); | ||
} | ||
public static int encode(Float f, float offset) { | ||
|
||
// TODO: fix to be 10-bit null encoding also... | ||
if (f == null) return NULL_ENCODING; | ||
|
||
public static final int LRR_OFFSET = 0; | ||
public static final float LRR_MIN = -28; | ||
public static final float LRR_MAX = 7; | ||
return | ||
Math.min( | ||
Math.max( | ||
Math.round((f+offset) * 1000.0f), | ||
MIN_16_BIT_VALUE | ||
), | ||
MAX_16_BIT_VALUE | ||
); | ||
} | ||
|
||
public static final int BAF_OFFSET = 8; | ||
public static final float BAF_MIN = 0; | ||
public static final float BAF_MAX = 1; | ||
public static int encode10bits(Float f, float offset) { | ||
|
||
public static final int NORMX_OFFSET = 16; | ||
public static final float NORMX_MIN = 0; | ||
public static final float NORMX_MAX = 8; | ||
if (f == null) return NULL_10_BIT_ENCODING; | ||
|
||
public static final int NORMY_OFFSET = 24; | ||
public static final float NORMY_MIN = 0; | ||
public static final float NORMY_MAX = 8; | ||
return | ||
Math.min( | ||
Math.max( | ||
Math.round((f+offset) * 1000.0f), | ||
MIN_10_BIT_VALUE | ||
), | ||
MAX_10_BIT_VALUE | ||
); | ||
} | ||
|
||
public static final int GT_OFFSET = 32; | ||
public static final int PROBE_ID_OFFSET = 42; | ||
public static Float decode(long i) { | ||
return decode(i, 0); | ||
} | ||
|
||
// GTC Data Ranges: https://github.com/Illumina/BeadArrayFiles/blob/develop/docs/GTC_File_Format_v5.pdf | ||
public static RawArrayData decode(long bits) { | ||
public static Float decode(long i, float offset) { | ||
if (i == NULL_ENCODING) return null; | ||
return ( | ||
(float) i) / 1000.0f - offset; | ||
} | ||
|
||
RawArrayData data = new RawArrayData(); | ||
data.lrr = decodeFrom8Bits((int) extractBits(bits, LRR_OFFSET, 8), LRR_MIN, LRR_MAX); | ||
data.baf = decodeFrom8Bits((int) extractBits(bits, BAF_OFFSET, 8), BAF_MIN, BAF_MAX); | ||
data.normx = decodeFrom8Bits((int) extractBits(bits, NORMX_OFFSET, 8), NORMX_MIN, NORMX_MAX); | ||
data.normy = decodeFrom8Bits((int) extractBits(bits, NORMY_OFFSET, 8), NORMY_MIN, NORMY_MAX); | ||
data.genotype = decodeGenotype((int) extractBits(bits, GT_OFFSET, 2)); | ||
data.probeId = (int) extractBits(bits, PROBE_ID_OFFSET, 22); | ||
public static Float decode10bits(long i) { | ||
if (i == NULL_10_BIT_ENCODING) return null; | ||
return ( | ||
(float) i) / 1000.0f; | ||
} | ||
|
||
return data; | ||
public RawArrayData(long bits) { | ||
try { | ||
this.normx = decode(extractBits(bits, NORMX_OFFSET, 16)); | ||
this.normy = decode(extractBits(bits, NORMY_OFFSET, 16)); | ||
this.lrr = decode(extractBits(bits, LRR_OFFSET, 16), 32.0f); | ||
this.baf = decode10bits(extractBits(bits, BAF_OFFSET, 10)); | ||
} catch (NullPointerException npe) { | ||
npe.printStackTrace(); | ||
throw npe; | ||
} | ||
} | ||
|
||
public long encode() { | ||
long lrrBits = encodeTo8Bits(this.lrr, LRR_MIN, LRR_MAX); | ||
long bafBits = encodeTo8Bits(this.baf, BAF_MIN, BAF_MAX); | ||
long normxBits = encodeTo8Bits(this.normx, NORMX_MIN, NORMX_MAX); | ||
long normyBits = encodeTo8Bits(this.normy, NORMX_MIN, NORMX_MAX); | ||
long gtBits = (long) encodeGenotype(this.genotype); | ||
long normxBits = encode(this.normx); | ||
long normyBits = encode(this.normy); | ||
long lrrBits = encode(this.lrr, 32.0f); | ||
long bafBits = encode10bits(this.baf, 0.0f); | ||
|
||
return ( | ||
(lrrBits << LRR_OFFSET) | | ||
(bafBits << BAF_OFFSET) | | ||
(normxBits << NORMX_OFFSET) | | ||
(normxBits << NORMX_OFFSET) | | ||
(normyBits << NORMY_OFFSET) | | ||
(gtBits << GT_OFFSET) | | ||
((long) this.probeId << PROBE_ID_OFFSET ) | ||
(lrrBits << LRR_OFFSET) | | ||
(bafBits << BAF_OFFSET) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.