From a6919688c2d371d3c9fcada14e7b7a0c6a5a81dd Mon Sep 17 00:00:00 2001 From: Zhixiang Zhu Date: Mon, 21 May 2018 20:27:13 +0900 Subject: [PATCH] Optimized the performance of BEDReader. --- .../gear/family/pedigree/file/BEDReader.java | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/GEAR/src/gear/family/pedigree/file/BEDReader.java b/GEAR/src/gear/family/pedigree/file/BEDReader.java index 80b527e9..d5274cbe 100644 --- a/GEAR/src/gear/family/pedigree/file/BEDReader.java +++ b/GEAR/src/gear/family/pedigree/file/BEDReader.java @@ -6,6 +6,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; +import java.util.HashSet; import org.apache.commons.lang3.ArrayUtils; @@ -82,6 +83,8 @@ public void initial() { @Override public void parseLinkage(String infile, int numMarkerInFile, int[] WSNP) { + long startTime = System.nanoTime(); + initial(); pedfile = infile; BufferedInputStream in = null; @@ -103,6 +106,8 @@ public void parseLinkage(String infile, int numMarkerInFile, int[] WSNP) { individual_major(in, numMarkerInFile, WSNP); } in.close(); + long endTime = System.nanoTime(); + Logger.printUserLog(String.format("It takes %.1fs to read the data.", (endTime - startTime) / 1e9)); } catch (IOException e) { Logger.handleException(e, "An I/O exception occurred when reading the bed file."); } @@ -176,28 +181,27 @@ private static int[][] constructSnpMajorGenotypeByteConvertTable() { return table; } - private void snp_major(BufferedInputStream in, int numMarkerInFile, int[] WSNP) throws IOException { + private void snp_major(BufferedInputStream in, int numMarkerInFile, int[] wsnp) throws IOException { byte[] g = new byte[(n_individual + 3) / 4]; int[][] genoByteCvtTable = constructSnpMajorGenotypeByteConvertTable(); + + // Convert wsnp from array to set + HashSet wsnpSet = new HashSet(); + for (int snp : wsnp) + { + wsnpSet.add(snp); + } + int snpIdx = 0; for (int i = 0; i < numMarkerInFile; i++) { in.read(g, 0, g.length); - if (ArrayUtils.indexOf(WSNP, i) >= 0) { + if (wsnpSet.contains(i)) { int indIdx = 0; int posByte = snpIdx >> Person.shift; int posBit = (i & 0xf) << 1; for (int byteIdx = 0; byteIdx < g.length; ++byteIdx) { - int[] genoValues = genoByteCvtTable[g[byteIdx] & 0xff]; // 0xff - // is - // necessary - // here, - // otherwise - // Java - // will - // sign - // extend - // the - // byte + // 0xff is necessary here, otherwise Java will sign extend the byte + int[] genoValues = genoByteCvtTable[g[byteIdx] & 0xff]; for (int j = 0; j < 4 && indIdx < n_individual; ++j, ++indIdx) { persons.get(indIdx).addByteGenotype(genoValues[j], posByte, posBit); }