Skip to content

Commit

Permalink
Optimized the performance of BEDReader.
Browse files Browse the repository at this point in the history
  • Loading branch information
zzxiang committed May 21, 2018
1 parent 1ca8827 commit a691968
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions GEAR/src/gear/family/pedigree/file/BEDReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;

import org.apache.commons.lang3.ArrayUtils;

Expand Down Expand Up @@ -82,6 +83,8 @@ public void initial() {

@Override
public void parseLinkage(String infile, int numMarkerInFile, int[] WSNP) {
long startTime = System.nanoTime();

initial();
pedfile = infile;
BufferedInputStream in = null;
Expand All @@ -103,6 +106,8 @@ public void parseLinkage(String infile, int numMarkerInFile, int[] WSNP) {
individual_major(in, numMarkerInFile, WSNP);
}
in.close();
long endTime = System.nanoTime();
Logger.printUserLog(String.format("It takes %.1fs to read the data.", (endTime - startTime) / 1e9));
} catch (IOException e) {
Logger.handleException(e, "An I/O exception occurred when reading the bed file.");
}
Expand Down Expand Up @@ -176,28 +181,27 @@ private static int[][] constructSnpMajorGenotypeByteConvertTable() {
return table;
}

private void snp_major(BufferedInputStream in, int numMarkerInFile, int[] WSNP) throws IOException {
private void snp_major(BufferedInputStream in, int numMarkerInFile, int[] wsnp) throws IOException {
byte[] g = new byte[(n_individual + 3) / 4];
int[][] genoByteCvtTable = constructSnpMajorGenotypeByteConvertTable();

// Convert wsnp from array to set
HashSet<Integer> wsnpSet = new HashSet<Integer>();
for (int snp : wsnp)
{
wsnpSet.add(snp);
}

int snpIdx = 0;
for (int i = 0; i < numMarkerInFile; i++) {
in.read(g, 0, g.length);
if (ArrayUtils.indexOf(WSNP, i) >= 0) {
if (wsnpSet.contains(i)) {
int indIdx = 0;
int posByte = snpIdx >> Person.shift;
int posBit = (i & 0xf) << 1;
for (int byteIdx = 0; byteIdx < g.length; ++byteIdx) {
int[] genoValues = genoByteCvtTable[g[byteIdx] & 0xff]; // 0xff
// is
// necessary
// here,
// otherwise
// Java
// will
// sign
// extend
// the
// byte
// 0xff is necessary here, otherwise Java will sign extend the byte
int[] genoValues = genoByteCvtTable[g[byteIdx] & 0xff];
for (int j = 0; j < 4 && indIdx < n_individual; ++j, ++indIdx) {
persons.get(indIdx).addByteGenotype(genoValues[j], posByte, posBit);
}
Expand Down

2 comments on commit a691968

@gc5k
Copy link
Owner

@gc5k gc5k commented on a691968 May 21, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

n=295, m=150K; 0.14s
n=988, m=919K; 15.4s
n=988, m=649K; 15.8s
n=2466, m=649k; 89.7s

@gc5k
Copy link
Owner

@gc5k gc5k commented on a691968 May 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hpc performance
n=295, m=150K; 0.14s
n=988, m=919K; 12s
n=988, m=649K; 9s
n=2466, m=649k; 25.3s

Please sign in to comment.