Skip to content

Commit

Permalink
Handle Overflow: Using MSB position to calculate bit width. (#6)
Browse files Browse the repository at this point in the history
* Handle Overflow: Using MSB postion to calcuate bitwidth
  • Loading branch information
ee-naveen authored Aug 19, 2023
1 parent 23cc2d2 commit 2dcf362
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 6 deletions.
35 changes: 35 additions & 0 deletions src/Parquet.Test/Encodings/DeltaBinaryPackedEncodingTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,40 @@ public void EncodeAndDecodeInt64_1_100000() {

Assert.Equal(input, des);
}

[Fact]
public void EncodeAndDecodeInt32_Random_Overflow() {
var r = new Random(0);
int total = 1000;
int[] input = Enumerable.Range(0, total).Select(i => r.Next(int.MinValue, int.MaxValue)).ToArray();

using var ms = new MemoryStream();
DeltaBinaryPackedEncoder.Encode(input, 0, input.Length, ms);

int[] des = new int[input.Length];
int i = DeltaBinaryPackedEncoder.Decode(ms.ToArray(), des, 0, input.Length, out int b);

Assert.Equal(input, des);
}

[Fact]
public void EncodeAndDecodeInt64_Random_Overflow() {
var r = new Random(0);
int total = 1000;
long[] input = Enumerable.Range(0, total).Select(i => {
byte[] buffer = new byte[8];
r.NextBytes(buffer);
long randomInt64 = BitConverter.ToInt64(buffer, 0);
return randomInt64;
}).ToArray();

using var ms = new MemoryStream();
DeltaBinaryPackedEncoder.Encode(input, 0, input.Length, ms);

long[] des = new long[input.Length];
long i = DeltaBinaryPackedEncoder.Decode(ms.ToArray(), des, 0, input.Length, out int b);

Assert.Equal(input, des);
}
}
}
8 changes: 4 additions & 4 deletions src/Parquet/Encodings/DeltaBinaryPackedEncoder.Variations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ private static void FlushIntBlock(Span<int> block, int minDelta,
if(count < 0)
break;

int max = block.Slice(offset, count).Max();
bitWidths[bwi] = (byte)max.GetBitWidth();
int bitwidth = block.Slice(offset, count).CalculateBitWidth();
bitWidths[bwi] = (byte)bitwidth;
}

// write bit widths
Expand Down Expand Up @@ -182,8 +182,8 @@ private static void FlushLongBlock(Span<long> block, long minDelta,
if(count < 0)
break;

long max = block.Slice(offset, count).Max();
bitWidths[bwi] = (byte)max.GetBitWidth();
int bitwidth = block.Slice(offset, count).CalculateBitWidth();
bitWidths[bwi] = (byte)bitwidth;
}

// write bit widths
Expand Down
4 changes: 2 additions & 2 deletions src/Parquet/Encodings/DeltaBinaryPackedEncoder.Variations.tt
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ namespace Parquet.Encodings {
if(count < 0)
break;

<#=nt#> max = block.Slice(offset, count).Max();
bitWidths[bwi] = (byte)max.GetBitWidth();
int bitwidth = block.Slice(offset, count).CalculateBitWidth();
bitWidths[bwi] = (byte)bitwidth;
}

// write bit widths
Expand Down
19 changes: 19 additions & 0 deletions src/Parquet/Encodings/DeltaBinaryPackedEncoder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,24 @@ public static int Decode(Span<byte> s, Array dest, int destOffset, int valueCoun

throw new NotSupportedException($"element type {elementType} is not supported");
}


//this extension method calculates the position of the most significant bit that is set to 1
static int CalculateBitWidth(this Span<int> span) {
int mask = 0;
for(int i = 0; i < span.Length; i++) {
mask |= span[i];
}
return 32 - mask.NumberOfLeadingZerosInt();
}

//this extension method calculates the position of the most significant bit that is set to 1
static int CalculateBitWidth(this Span<long> span) {
long mask = 0;
for(int i = 0; i < span.Length; i++) {
mask |= span[i];
}
return 64 - mask.NumberOfLeadingZerosLong();
}
}
}
19 changes: 19 additions & 0 deletions src/Parquet/Extensions/EncodingExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -97,5 +97,24 @@ public static void WriteULEB128(this Stream destination, ulong value) {

#endregion

#region Leading Zeros
public static int NumberOfLeadingZerosInt(this int num) {
if(num <= 0)
return num == 0 ? 32 : 0;
int n = 31;
if(num >= 1 << 16) { n -= 16; num >>>= 16; }
if(num >= 1 << 8) { n -= 8; num >>>= 8; }
if(num >= 1 << 4) { n -= 4; num >>>= 4; }
if(num >= 1 << 2) { n -= 2; num >>>= 2; }
return n - (num >>> 1);
}

public static int NumberOfLeadingZerosLong(this long num) {
int x = (int)(num >>> 32);
return x == 0 ? 32 + ((int)num).NumberOfLeadingZerosInt()
: x.NumberOfLeadingZerosInt();
}
#endregion

}
}

0 comments on commit 2dcf362

Please sign in to comment.