Skip to content

Commit

Permalink
Internal Performance: adds performance improvement to encoded strings
Browse files Browse the repository at this point in the history
  • Loading branch information
fnarenji committed Dec 10, 2020
1 parent edf3e77 commit c6a284f
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 109 deletions.
32 changes: 10 additions & 22 deletions Microsoft.Azure.Cosmos/src/Json/JsonBinaryEncoding.Strings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ public static bool TryEncodeCompressedString(
return false;
}

int firstSetBit = 128;
int lastSetBit = 0;
int charCount = 0;
BitArray valueCharSet = new BitArray(length: 128);
// Create a bit-set with all the ASCII character of the string value
for (int index = 0; index < stringValue.Length; index++)
Expand All @@ -660,33 +663,18 @@ public static bool TryEncodeCompressedString(
return false;
}

valueCharSet.Set(charValue, true);
}

int firstSetBit = 0;
for (; (firstSetBit < valueCharSet.Length) && !valueCharSet[firstSetBit]; firstSetBit++)
{
}

int lastSetBit = valueCharSet.Length - 1;
for (; (lastSetBit > 0) && !valueCharSet[lastSetBit]; lastSetBit--)
{
}

int charCount = 0;
int firstBitSet = int.MaxValue;
int lastBitSet = int.MinValue;
for (int i = 0; i < valueCharSet.Length; i++)
{
if (valueCharSet[i])
if (!valueCharSet[charValue])
{
charCount++;
firstBitSet = Math.Min(firstBitSet, i);
lastBitSet = Math.Max(lastBitSet, i);

firstSetBit = Math.Min(charValue, firstSetBit);
lastSetBit = Math.Max(charValue, lastSetBit);
}

valueCharSet.Set(charValue, true);
}

int charRange = lastSetBit - firstSetBit + 1;
int charRange = (lastSetBit - firstSetBit) + 1;

// Attempt to encode the string as 4-bit packed values over a defined character set
if ((stringValue.Length <= 0xFF) && (charCount <= 16) && (stringValue.Length >= Min4BitCharSetStringLength))
Expand Down
157 changes: 70 additions & 87 deletions Microsoft.Azure.Cosmos/src/Json/JsonWriter.JsonBinaryWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ private enum RawValueType : byte

private readonly ReferenceStringDictionary sharedStringIndexes;

/// <summary>
/// Offsets at which string references offsets are stored.
/// </summary>
private readonly List<int> stringReferenceOffsets;

/// <summary>
/// Initializes a new instance of the JsonBinaryWriter class.
/// </summary>
Expand All @@ -240,13 +245,14 @@ public JsonBinaryWriter(
this.reservationSize = JsonBinaryEncoding.TypeMarkerLength + JsonBinaryEncoding.OneByteLength + (this.serializeCount ? JsonBinaryEncoding.OneByteCount : 0);
this.sharedStrings = new List<SharedStringValue>();
this.sharedStringIndexes = new ReferenceStringDictionary();
this.stringReferenceOffsets = new List<int>();

// Write the serialization format as the very first byte
byte binaryTypeMarker = (byte)JsonSerializationFormat.Binary;
this.binaryWriter.Write(binaryTypeMarker);

// Push on the outermost context
this.bufferedContexts.Push(new ArrayAndObjectInfo(this.CurrentLength));
this.bufferedContexts.Push(new ArrayAndObjectInfo(this.CurrentLength, stringStartIndex: 0, stringReferenceStartIndex: 0, valueCount: 0));
}

/// <inheritdoc />
Expand Down Expand Up @@ -431,10 +437,7 @@ private void WriterArrayOrObjectStart(bool isArray)
this.JsonObjectState.RegisterToken(isArray ? JsonTokenType.BeginArray : JsonTokenType.BeginObject);

// Save the start index
ArrayAndObjectInfo info = new ArrayAndObjectInfo(this.CurrentLength)
{
StringStartIndex = this.sharedStrings.Count
};
ArrayAndObjectInfo info = new ArrayAndObjectInfo(this.CurrentLength, this.sharedStrings.Count, this.stringReferenceOffsets.Count, valueCount: 0);
this.bufferedContexts.Push(info);

// Assume 1-byte value length; as such, we need to reserve up 3 bytes (1 byte type marker, 1 byte length, 1 byte count).
Expand All @@ -459,6 +462,7 @@ private void WriteArrayOrObjectEnd(bool isArray)
int payloadLength = originalCursor - payloadIndex;
int count = (int)nestedContext.Count;
int stringStartIndex = (int)nestedContext.StringStartIndex;
int stringReferenceStartIndex = (int)nestedContext.StringReferenceStartIndex;

// Figure out what the typemarker and length should be and do any corrections needed
if (count == 0)
Expand All @@ -478,7 +482,7 @@ private void WriteArrayOrObjectEnd(bool isArray)
// Move the buffer back but leave one byte for the typemarker
Span<byte> buffer = this.binaryWriter.BufferAsSpan;
int bytesToWrite = JsonBinaryEncoding.TypeMarkerLength;
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex);
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex, stringReferenceStartIndex);

// Move the cursor back
this.binaryWriter.Position = typeMarkerIndex;
Expand Down Expand Up @@ -532,7 +536,7 @@ private void WriteArrayOrObjectEnd(bool isArray)
int bytesToWrite = JsonBinaryEncoding.TypeMarkerLength
+ JsonBinaryEncoding.TwoByteLength
+ (this.serializeCount ? JsonBinaryEncoding.TwoByteCount : 0);
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex);
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex, stringReferenceStartIndex);

// Move the cursor back
this.binaryWriter.Position = typeMarkerIndex;
Expand Down Expand Up @@ -571,7 +575,7 @@ private void WriteArrayOrObjectEnd(bool isArray)
int bytesToWrite = JsonBinaryEncoding.TypeMarkerLength
+ JsonBinaryEncoding.FourByteLength
+ (this.serializeCount ? JsonBinaryEncoding.FourByteCount : 0);
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex);
this.MoveBuffer(buffer, payloadIndex, payloadLength, typeMarkerIndex, bytesToWrite, stringStartIndex, stringReferenceStartIndex);

// Move the cursor back
this.binaryWriter.Position = typeMarkerIndex;
Expand All @@ -597,13 +601,20 @@ private void WriteArrayOrObjectEnd(bool isArray)
this.bufferedContexts.Peek().Count++;

// If we are closing the outermost array / object, we need to fix up reference string offsets
if (typeMarkerIndex == 1)
if (typeMarkerIndex == 1 && this.sharedStrings.Count > 0)
{
this.FixReferenceStringOffsets(this.binaryWriter.RawBuffer.Slice(start: 1));
this.FixReferenceStringOffsets(this.binaryWriter.BufferAsSpan);
}
}

private void MoveBuffer(Span<byte> buffer, int payloadIndex, int payloadLength, int typeMarkerIndex, int bytesToWrite, int stringStartIndex)
private void MoveBuffer(
Span<byte> buffer,
int payloadIndex,
int payloadLength,
int typeMarkerIndex,
int bytesToWrite,
int stringStartIndex,
int stringReferenceOffsetLow)
{
Span<byte> payload = buffer.Slice(payloadIndex, payloadLength);
int newPayloadIndex = typeMarkerIndex + bytesToWrite;
Expand All @@ -616,101 +627,69 @@ private void MoveBuffer(Span<byte> buffer, int payloadIndex, int payloadLength,
SharedStringValue sharedStringValue = this.sharedStrings[index];
this.sharedStrings[index] = new SharedStringValue(offset: sharedStringValue.Offset + delta, maxOffset: sharedStringValue.MaxOffset);
}
}

private void FixReferenceStringOffsets(Memory<byte> buffer)
{
if (this.sharedStrings.Count == 0)
for (int i = stringReferenceOffsetLow; i < this.stringReferenceOffsets.Count; ++i)
{
return;
this.stringReferenceOffsets[i] += delta;
}
}

byte typeMarker = buffer.Span[0];

JsonNodeType nodeType = JsonBinaryEncoding.NodeTypes.Lookup[typeMarker];
switch (nodeType)
private void FixReferenceStringOffsets(Span<byte> binaryWriterRawBuffer)
{
foreach (int stringReferenceOffset in this.stringReferenceOffsets)
{
case JsonNodeType.Null:
case JsonNodeType.False:
case JsonNodeType.True:
case JsonNodeType.Number64:
case JsonNodeType.Int8:
case JsonNodeType.Int16:
case JsonNodeType.Int32:
case JsonNodeType.Int64:
case JsonNodeType.UInt32:
case JsonNodeType.Float32:
case JsonNodeType.Float64:
case JsonNodeType.Binary:
case JsonNodeType.Guid:
// Do Nothing
break;
byte typeMarker = binaryWriterRawBuffer[stringReferenceOffset];

case JsonNodeType.String:
case JsonNodeType.FieldName:
JsonNodeType nodeType = JsonBinaryEncoding.NodeTypes.Lookup[typeMarker];
switch (nodeType)
{
case JsonNodeType.String:
case JsonNodeType.FieldName:
{
Memory<byte> offsetBuffer = buffer.Slice(start: 1);
Span<byte> offsetBuffer = binaryWriterRawBuffer.Slice(stringReferenceOffset + 1);
switch (typeMarker)
{
case JsonBinaryEncoding.TypeMarker.ReferenceString1ByteOffset:
{
byte stringIndex = JsonBinaryEncoding.GetFixedSizedValue<byte>(offsetBuffer.Span);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<byte>(offsetBuffer.Span, (byte)sharedStringValue.Offset);
}
{
byte stringIndex = offsetBuffer[0];
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<byte>(offsetBuffer, (byte)sharedStringValue.Offset);
break;
}

case JsonBinaryEncoding.TypeMarker.ReferenceString2ByteOffset:
{
ushort stringIndex = JsonBinaryEncoding.GetFixedSizedValue<ushort>(offsetBuffer.Span);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<ushort>(offsetBuffer.Span, (ushort)sharedStringValue.Offset);
}
{
ushort stringIndex = JsonBinaryEncoding.GetFixedSizedValue<ushort>(offsetBuffer);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<ushort>(offsetBuffer, (ushort)sharedStringValue.Offset);
break;
}

case JsonBinaryEncoding.TypeMarker.ReferenceString3ByteOffset:
{
JsonBinaryEncoding.UInt24 stringIndex = JsonBinaryEncoding.GetFixedSizedValue<JsonBinaryEncoding.UInt24>(offsetBuffer.Span);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<JsonBinaryEncoding.UInt24>(offsetBuffer.Span, (JsonBinaryEncoding.UInt24)sharedStringValue.Offset);
}
{
JsonBinaryEncoding.UInt24 stringIndex =
JsonBinaryEncoding.GetFixedSizedValue<JsonBinaryEncoding.UInt24>(offsetBuffer);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<JsonBinaryEncoding.UInt24>(
offsetBuffer,
(JsonBinaryEncoding.UInt24)sharedStringValue.Offset);
break;
}

case JsonBinaryEncoding.TypeMarker.ReferenceString4ByteOffset:
{
int stringIndex = JsonBinaryEncoding.GetFixedSizedValue<int>(offsetBuffer.Span);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<int>(offsetBuffer.Span, (int)sharedStringValue.Offset);
}
break;

default:
// Do Nothing
{
int stringIndex = JsonBinaryEncoding.GetFixedSizedValue<int>(offsetBuffer);
SharedStringValue sharedStringValue = this.sharedStrings[stringIndex];
JsonBinaryEncoding.SetFixedSizedValue<int>(offsetBuffer, (int)sharedStringValue.Offset);
break;
}
}
}
break;

case JsonNodeType.Array:
foreach (Memory<byte> arrayItem in JsonBinaryEncoding.Enumerator.GetMutableArrayItems(buffer))
{
this.FixReferenceStringOffsets(arrayItem);
}

break;

case JsonNodeType.Object:
foreach (JsonBinaryEncoding.Enumerator.MutableObjectProperty mutableObjectProperty in JsonBinaryEncoding.Enumerator.GetMutableObjectProperties(buffer))
{
this.FixReferenceStringOffsets(mutableObjectProperty.Name);
this.FixReferenceStringOffsets(mutableObjectProperty.Value);
break;
}

break;

case JsonNodeType.Unknown:
default:
throw new InvalidOperationException($"Unknown {nameof(nodeType)}: {nodeType}.");
default:
throw new InvalidOperationException($"Unknown {nameof(nodeType)}: {nodeType}.");
}
}
}

Expand Down Expand Up @@ -841,6 +820,7 @@ private bool TryRegisterStringValue(Utf8Span utf8Span)
}

SharedStringValue sharedString = this.sharedStrings[(int)hashAndIndex.index];
this.stringReferenceOffsets.Add(this.binaryWriter.Position);

if (sharedString.MaxOffset <= byte.MaxValue)
{
Expand Down Expand Up @@ -1125,18 +1105,21 @@ private void WriteRawStringValue(RawValueType rawValueType, ReadOnlyMemory<byte>

private sealed class ArrayAndObjectInfo
{
public ArrayAndObjectInfo(long offset)
public ArrayAndObjectInfo(long offset, int stringStartIndex, long stringReferenceStartIndex, int valueCount)
{
this.Offset = offset;
this.Count = 0;
this.StringStartIndex = 0;
this.Count = valueCount;
this.StringStartIndex = stringStartIndex;
this.StringReferenceStartIndex = stringReferenceStartIndex;
}

public long Offset { get; }

public long Count { get; set; }

public long StringStartIndex { get; set; }
public long StringStartIndex { get; }

public long StringReferenceStartIndex { get; }
}

private sealed class JsonBinaryMemoryWriter : JsonMemoryWriter
Expand Down

0 comments on commit c6a284f

Please sign in to comment.