Skip to content

Commit

Permalink
Reduce specialization in ForUtil and ForDeltaUtil. (#14048)
Browse files Browse the repository at this point in the history
These classes specialize all bits per value up to 24. But performance of high
numbers of bits per value is not very important, because they are used by short
postings lists, which are fast to iterate anyway. So this PR only specializes
up to 16 bits per value.

For instance, if a postings list uses blocks of 17 bits per value, it means
that one can find gaps of 65,536 consecutive doc IDs that do not contain the
term. Such rare terms do not drive query performance.
  • Loading branch information
jpountz committed Dec 7, 2024
1 parent ce55f59 commit f6a9be4
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 346 deletions.
4 changes: 2 additions & 2 deletions lucene/core/src/generated/checksums/generateForDeltaUtil.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForDeltaUtil.java": "0ff7fb9159693055d9e4b9468b004166156f6550",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForDeltaUtil.py": "8c55b7aaced028388408c5eb968b1f1197e11142"
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForDeltaUtil.java": "e0bf6071bcdefaa297e0bb92f79615201777652d",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForDeltaUtil.py": "d7484ab18da33e5cb73faaf84b4e2bb832b62f9d"
}
4 changes: 2 additions & 2 deletions lucene/core/src/generated/checksums/generateForUtil.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForUtil.java": "10ceb79f031232bc1e4564db7e3ebb16eedd2e0a",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForUtil.py": "d69e734bce30375952046a3776bbb7a5c1edbd51"
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForUtil.java": "3004112150943413e0f7fcc3e56b74c4875c1d64",
"lucene/core/src/java/org/apache/lucene/codecs/lucene101/gen_ForUtil.py": "b1041b6b46caab789c04d99483ee016d550eeebc"
}
Original file line number Diff line number Diff line change
Expand Up @@ -306,38 +306,6 @@ void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, int base, int
decode16To32(pdu, ints);
prefixSum32(ints, base);
break;
case 17:
decode17(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 18:
decode18(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 19:
decode19(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 20:
decode20(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 21:
decode21(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 22:
decode22(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 23:
decode23(pdu, tmp, ints);
prefixSum32(ints, base);
break;
case 24:
decode24(pdu, tmp, ints);
prefixSum32(ints, base);
break;
default:
decodeSlow(bitsPerValue, pdu, tmp, ints);
prefixSum32(ints, base);
Expand Down
308 changes: 0 additions & 308 deletions lucene/core/src/java/org/apache/lucene/codecs/lucene101/ForUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,6 @@ static void decodeSlow(int bitsPerValue, PostingDecodingUtil pdu, int[] tmp, int
static final int MASK32_14 = MASKS32[14];
static final int MASK32_15 = MASKS32[15];
static final int MASK32_16 = MASKS32[16];
static final int MASK32_17 = MASKS32[17];
static final int MASK32_18 = MASKS32[18];
static final int MASK32_19 = MASKS32[19];
static final int MASK32_20 = MASKS32[20];
static final int MASK32_21 = MASKS32[21];
static final int MASK32_22 = MASKS32[22];
static final int MASK32_23 = MASKS32[23];
static final int MASK32_24 = MASKS32[24];

/** Decode 128 integers into {@code ints}. */
void decode(int bitsPerValue, PostingDecodingUtil pdu, int[] ints) throws IOException {
Expand Down Expand Up @@ -321,30 +313,6 @@ void decode(int bitsPerValue, PostingDecodingUtil pdu, int[] ints) throws IOExce
decode16(pdu, ints);
expand16(ints);
break;
case 17:
decode17(pdu, tmp, ints);
break;
case 18:
decode18(pdu, tmp, ints);
break;
case 19:
decode19(pdu, tmp, ints);
break;
case 20:
decode20(pdu, tmp, ints);
break;
case 21:
decode21(pdu, tmp, ints);
break;
case 22:
decode22(pdu, tmp, ints);
break;
case 23:
decode23(pdu, tmp, ints);
break;
case 24:
decode24(pdu, tmp, ints);
break;
default:
decodeSlow(bitsPerValue, pdu, tmp, ints);
break;
Expand Down Expand Up @@ -562,280 +530,4 @@ static void decode15(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOEx
static void decode16(PostingDecodingUtil pdu, int[] ints) throws IOException {
pdu.in.readInts(ints, 0, 64);
}

static void decode17(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(68, ints, 15, 17, MASK32_17, tmp, 0, MASK32_15);
for (int iter = 0, tmpIdx = 0, intsIdx = 68; iter < 4; ++iter, tmpIdx += 17, intsIdx += 15) {
int l0 = tmp[tmpIdx + 0] << 2;
l0 |= (tmp[tmpIdx + 1] >>> 13) & MASK32_2;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_13) << 4;
l1 |= (tmp[tmpIdx + 2] >>> 11) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_11) << 6;
l2 |= (tmp[tmpIdx + 3] >>> 9) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 3] & MASK32_9) << 8;
l3 |= (tmp[tmpIdx + 4] >>> 7) & MASK32_8;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 4] & MASK32_7) << 10;
l4 |= (tmp[tmpIdx + 5] >>> 5) & MASK32_10;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 5] & MASK32_5) << 12;
l5 |= (tmp[tmpIdx + 6] >>> 3) & MASK32_12;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 6] & MASK32_3) << 14;
l6 |= (tmp[tmpIdx + 7] >>> 1) & MASK32_14;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 7] & MASK32_1) << 16;
l7 |= tmp[tmpIdx + 8] << 1;
l7 |= (tmp[tmpIdx + 9] >>> 14) & MASK32_1;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 9] & MASK32_14) << 3;
l8 |= (tmp[tmpIdx + 10] >>> 12) & MASK32_3;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 10] & MASK32_12) << 5;
l9 |= (tmp[tmpIdx + 11] >>> 10) & MASK32_5;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 11] & MASK32_10) << 7;
l10 |= (tmp[tmpIdx + 12] >>> 8) & MASK32_7;
ints[intsIdx + 10] = l10;
int l11 = (tmp[tmpIdx + 12] & MASK32_8) << 9;
l11 |= (tmp[tmpIdx + 13] >>> 6) & MASK32_9;
ints[intsIdx + 11] = l11;
int l12 = (tmp[tmpIdx + 13] & MASK32_6) << 11;
l12 |= (tmp[tmpIdx + 14] >>> 4) & MASK32_11;
ints[intsIdx + 12] = l12;
int l13 = (tmp[tmpIdx + 14] & MASK32_4) << 13;
l13 |= (tmp[tmpIdx + 15] >>> 2) & MASK32_13;
ints[intsIdx + 13] = l13;
int l14 = (tmp[tmpIdx + 15] & MASK32_2) << 15;
l14 |= tmp[tmpIdx + 16] << 0;
ints[intsIdx + 14] = l14;
}
}

static void decode18(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(72, ints, 14, 18, MASK32_18, tmp, 0, MASK32_14);
for (int iter = 0, tmpIdx = 0, intsIdx = 72; iter < 8; ++iter, tmpIdx += 9, intsIdx += 7) {
int l0 = tmp[tmpIdx + 0] << 4;
l0 |= (tmp[tmpIdx + 1] >>> 10) & MASK32_4;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_10) << 8;
l1 |= (tmp[tmpIdx + 2] >>> 6) & MASK32_8;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_6) << 12;
l2 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_12;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 3] & MASK32_2) << 16;
l3 |= tmp[tmpIdx + 4] << 2;
l3 |= (tmp[tmpIdx + 5] >>> 12) & MASK32_2;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 5] & MASK32_12) << 6;
l4 |= (tmp[tmpIdx + 6] >>> 8) & MASK32_6;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 6] & MASK32_8) << 10;
l5 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_10;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 7] & MASK32_4) << 14;
l6 |= tmp[tmpIdx + 8] << 0;
ints[intsIdx + 6] = l6;
}
}

static void decode19(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(76, ints, 13, 19, MASK32_19, tmp, 0, MASK32_13);
for (int iter = 0, tmpIdx = 0, intsIdx = 76; iter < 4; ++iter, tmpIdx += 19, intsIdx += 13) {
int l0 = tmp[tmpIdx + 0] << 6;
l0 |= (tmp[tmpIdx + 1] >>> 7) & MASK32_6;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_7) << 12;
l1 |= (tmp[tmpIdx + 2] >>> 1) & MASK32_12;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 2] & MASK32_1) << 18;
l2 |= tmp[tmpIdx + 3] << 5;
l2 |= (tmp[tmpIdx + 4] >>> 8) & MASK32_5;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 4] & MASK32_8) << 11;
l3 |= (tmp[tmpIdx + 5] >>> 2) & MASK32_11;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 5] & MASK32_2) << 17;
l4 |= tmp[tmpIdx + 6] << 4;
l4 |= (tmp[tmpIdx + 7] >>> 9) & MASK32_4;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 7] & MASK32_9) << 10;
l5 |= (tmp[tmpIdx + 8] >>> 3) & MASK32_10;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 8] & MASK32_3) << 16;
l6 |= tmp[tmpIdx + 9] << 3;
l6 |= (tmp[tmpIdx + 10] >>> 10) & MASK32_3;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 10] & MASK32_10) << 9;
l7 |= (tmp[tmpIdx + 11] >>> 4) & MASK32_9;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 11] & MASK32_4) << 15;
l8 |= tmp[tmpIdx + 12] << 2;
l8 |= (tmp[tmpIdx + 13] >>> 11) & MASK32_2;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 13] & MASK32_11) << 8;
l9 |= (tmp[tmpIdx + 14] >>> 5) & MASK32_8;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 14] & MASK32_5) << 14;
l10 |= tmp[tmpIdx + 15] << 1;
l10 |= (tmp[tmpIdx + 16] >>> 12) & MASK32_1;
ints[intsIdx + 10] = l10;
int l11 = (tmp[tmpIdx + 16] & MASK32_12) << 7;
l11 |= (tmp[tmpIdx + 17] >>> 6) & MASK32_7;
ints[intsIdx + 11] = l11;
int l12 = (tmp[tmpIdx + 17] & MASK32_6) << 13;
l12 |= tmp[tmpIdx + 18] << 0;
ints[intsIdx + 12] = l12;
}
}

static void decode20(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(80, ints, 12, 20, MASK32_20, tmp, 0, MASK32_12);
for (int iter = 0, tmpIdx = 0, intsIdx = 80; iter < 16; ++iter, tmpIdx += 5, intsIdx += 3) {
int l0 = tmp[tmpIdx + 0] << 8;
l0 |= (tmp[tmpIdx + 1] >>> 4) & MASK32_8;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_4) << 16;
l1 |= tmp[tmpIdx + 2] << 4;
l1 |= (tmp[tmpIdx + 3] >>> 8) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 3] & MASK32_8) << 12;
l2 |= tmp[tmpIdx + 4] << 0;
ints[intsIdx + 2] = l2;
}
}

static void decode21(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(84, ints, 11, 21, MASK32_21, tmp, 0, MASK32_11);
for (int iter = 0, tmpIdx = 0, intsIdx = 84; iter < 4; ++iter, tmpIdx += 21, intsIdx += 11) {
int l0 = tmp[tmpIdx + 0] << 10;
l0 |= (tmp[tmpIdx + 1] >>> 1) & MASK32_10;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 1] & MASK32_1) << 20;
l1 |= tmp[tmpIdx + 2] << 9;
l1 |= (tmp[tmpIdx + 3] >>> 2) & MASK32_9;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 3] & MASK32_2) << 19;
l2 |= tmp[tmpIdx + 4] << 8;
l2 |= (tmp[tmpIdx + 5] >>> 3) & MASK32_8;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 5] & MASK32_3) << 18;
l3 |= tmp[tmpIdx + 6] << 7;
l3 |= (tmp[tmpIdx + 7] >>> 4) & MASK32_7;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 7] & MASK32_4) << 17;
l4 |= tmp[tmpIdx + 8] << 6;
l4 |= (tmp[tmpIdx + 9] >>> 5) & MASK32_6;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 9] & MASK32_5) << 16;
l5 |= tmp[tmpIdx + 10] << 5;
l5 |= (tmp[tmpIdx + 11] >>> 6) & MASK32_5;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 11] & MASK32_6) << 15;
l6 |= tmp[tmpIdx + 12] << 4;
l6 |= (tmp[tmpIdx + 13] >>> 7) & MASK32_4;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 13] & MASK32_7) << 14;
l7 |= tmp[tmpIdx + 14] << 3;
l7 |= (tmp[tmpIdx + 15] >>> 8) & MASK32_3;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 15] & MASK32_8) << 13;
l8 |= tmp[tmpIdx + 16] << 2;
l8 |= (tmp[tmpIdx + 17] >>> 9) & MASK32_2;
ints[intsIdx + 8] = l8;
int l9 = (tmp[tmpIdx + 17] & MASK32_9) << 12;
l9 |= tmp[tmpIdx + 18] << 1;
l9 |= (tmp[tmpIdx + 19] >>> 10) & MASK32_1;
ints[intsIdx + 9] = l9;
int l10 = (tmp[tmpIdx + 19] & MASK32_10) << 11;
l10 |= tmp[tmpIdx + 20] << 0;
ints[intsIdx + 10] = l10;
}
}

static void decode22(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(88, ints, 10, 22, MASK32_22, tmp, 0, MASK32_10);
for (int iter = 0, tmpIdx = 0, intsIdx = 88; iter < 8; ++iter, tmpIdx += 11, intsIdx += 5) {
int l0 = tmp[tmpIdx + 0] << 12;
l0 |= tmp[tmpIdx + 1] << 2;
l0 |= (tmp[tmpIdx + 2] >>> 8) & MASK32_2;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 2] & MASK32_8) << 14;
l1 |= tmp[tmpIdx + 3] << 4;
l1 |= (tmp[tmpIdx + 4] >>> 6) & MASK32_4;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 4] & MASK32_6) << 16;
l2 |= tmp[tmpIdx + 5] << 6;
l2 |= (tmp[tmpIdx + 6] >>> 4) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 6] & MASK32_4) << 18;
l3 |= tmp[tmpIdx + 7] << 8;
l3 |= (tmp[tmpIdx + 8] >>> 2) & MASK32_8;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 8] & MASK32_2) << 20;
l4 |= tmp[tmpIdx + 9] << 10;
l4 |= tmp[tmpIdx + 10] << 0;
ints[intsIdx + 4] = l4;
}
}

static void decode23(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(92, ints, 9, 23, MASK32_23, tmp, 0, MASK32_9);
for (int iter = 0, tmpIdx = 0, intsIdx = 92; iter < 4; ++iter, tmpIdx += 23, intsIdx += 9) {
int l0 = tmp[tmpIdx + 0] << 14;
l0 |= tmp[tmpIdx + 1] << 5;
l0 |= (tmp[tmpIdx + 2] >>> 4) & MASK32_5;
ints[intsIdx + 0] = l0;
int l1 = (tmp[tmpIdx + 2] & MASK32_4) << 19;
l1 |= tmp[tmpIdx + 3] << 10;
l1 |= tmp[tmpIdx + 4] << 1;
l1 |= (tmp[tmpIdx + 5] >>> 8) & MASK32_1;
ints[intsIdx + 1] = l1;
int l2 = (tmp[tmpIdx + 5] & MASK32_8) << 15;
l2 |= tmp[tmpIdx + 6] << 6;
l2 |= (tmp[tmpIdx + 7] >>> 3) & MASK32_6;
ints[intsIdx + 2] = l2;
int l3 = (tmp[tmpIdx + 7] & MASK32_3) << 20;
l3 |= tmp[tmpIdx + 8] << 11;
l3 |= tmp[tmpIdx + 9] << 2;
l3 |= (tmp[tmpIdx + 10] >>> 7) & MASK32_2;
ints[intsIdx + 3] = l3;
int l4 = (tmp[tmpIdx + 10] & MASK32_7) << 16;
l4 |= tmp[tmpIdx + 11] << 7;
l4 |= (tmp[tmpIdx + 12] >>> 2) & MASK32_7;
ints[intsIdx + 4] = l4;
int l5 = (tmp[tmpIdx + 12] & MASK32_2) << 21;
l5 |= tmp[tmpIdx + 13] << 12;
l5 |= tmp[tmpIdx + 14] << 3;
l5 |= (tmp[tmpIdx + 15] >>> 6) & MASK32_3;
ints[intsIdx + 5] = l5;
int l6 = (tmp[tmpIdx + 15] & MASK32_6) << 17;
l6 |= tmp[tmpIdx + 16] << 8;
l6 |= (tmp[tmpIdx + 17] >>> 1) & MASK32_8;
ints[intsIdx + 6] = l6;
int l7 = (tmp[tmpIdx + 17] & MASK32_1) << 22;
l7 |= tmp[tmpIdx + 18] << 13;
l7 |= tmp[tmpIdx + 19] << 4;
l7 |= (tmp[tmpIdx + 20] >>> 5) & MASK32_4;
ints[intsIdx + 7] = l7;
int l8 = (tmp[tmpIdx + 20] & MASK32_5) << 18;
l8 |= tmp[tmpIdx + 21] << 9;
l8 |= tmp[tmpIdx + 22] << 0;
ints[intsIdx + 8] = l8;
}
}

static void decode24(PostingDecodingUtil pdu, int[] tmp, int[] ints) throws IOException {
pdu.splitInts(96, ints, 8, 24, MASK32_24, tmp, 0, MASK32_8);
for (int iter = 0, tmpIdx = 0, intsIdx = 96; iter < 32; ++iter, tmpIdx += 3, intsIdx += 1) {
int l0 = tmp[tmpIdx + 0] << 16;
l0 |= tmp[tmpIdx + 1] << 8;
l0 |= tmp[tmpIdx + 2] << 0;
ints[intsIdx + 0] = l0;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

"""Code generation for ForDeltaUtil.java"""

MAX_SPECIALIZED_BITS_PER_VALUE = 24
MAX_SPECIALIZED_BITS_PER_VALUE = 16
OUTPUT_FILE = "ForDeltaUtil.java"
PRIMITIVE_SIZE = [8, 16, 32]
HEADER = """// This file has been automatically generated, DO NOT EDIT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

"""Code generation for ForUtil.java"""

MAX_SPECIALIZED_BITS_PER_VALUE = 24
MAX_SPECIALIZED_BITS_PER_VALUE = 16
OUTPUT_FILE = "ForUtil.java"
PRIMITIVE_SIZE = [8, 16, 32]
HEADER = """// This file has been automatically generated, DO NOT EDIT
Expand Down

0 comments on commit f6a9be4

Please sign in to comment.