Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Utf8Array] GetLengthメソッドを追加 #82

Merged
merged 2 commits into from
Nov 6, 2021
Merged

Conversation

finphie
Copy link
Owner

@finphie finphie commented Nov 5, 2021

SharpLab

using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

public readonly partial struct Utf8Array
{
    readonly byte[] _value;
    
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public int GetLength1()
    {
        var count = 0;
        nuint i = 0;

        while ((int)i < _value.Length)
        {
            ref var valueStart = ref DangerousGetReference();
            var value = Unsafe.AddByteOffset(ref valueStart, (nint)i);
            i += (uint)UnicodeUtility.GetUtf8SequenceLength(value);
            count++;
        }

        return count;
    }
    
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public int GetLength_int()
    {
        var count = 0;
        var i = 0;

        while (i < _value.Length)
        {
            ref var valueStart = ref DangerousGetReference();
            var value = Unsafe.AddByteOffset(ref valueStart, (nint)(uint)i);
            i += UnicodeUtility.GetUtf8SequenceLength(value);
            count++;
        }

        return count;
    }
    
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public ref byte DangerousGetReference() => ref MemoryMarshal.GetArrayDataReference(_value);   
}

public static class UnicodeUtility
{
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public static int GetUtf8SequenceLength(byte value)
    {
        ReadOnlySpan<byte> trailingBytesForUTF8 = new byte[]
        {
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
            3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
            4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        };

        ref var table = ref MemoryMarshal.GetReference(trailingBytesForUTF8);
        return Unsafe.Add(ref table, (nint)value);
    }
}
Utf8Array.GetLength1()
    L0000: xor eax, eax
    L0002: xor edx, edx
    L0004: mov rcx, [rcx]
    L0007: cmp dword ptr [rcx+8], 0
    L000b: jle short L0035
    L000d: mov r8, rcx
    L0010: cmp [r8], r8d
    L0013: add r8, 0x10
    L0017: movzx r8d, byte ptr [r8+rdx]
    L001c: mov r9, 0x19def260a44
    L0026: movzx r8d, byte ptr [r8+r9]
    L002b: add rdx, r8
    L002e: inc eax
    L0030: cmp [rcx+8], edx
    L0033: jg short L000d
    L0035: ret

Utf8Array.GetLength_int()
    L0000: xor eax, eax
    L0002: xor edx, edx
    L0004: mov rcx, [rcx]
    L0007: cmp dword ptr [rcx+8], 0
    L000b: jle short L0038
    L000d: mov r8, rcx
    L0010: cmp [r8], r8d
    L0013: add r8, 0x10
    L0017: mov r9d, edx
    L001a: movzx r8d, byte ptr [r8+r9]
    L001f: mov r9, 0x19def260a44
    L0029: movzx r8d, byte ptr [r8+r9]
    L002e: add edx, r8d
    L0031: inc eax
    L0033: cmp [rcx+8], edx
    L0036: jg short L000d
    L0038: ret

@github-actions github-actions bot added the enhancement New feature or request label Nov 5, 2021
@finphie finphie merged commit f8b2268 into main Nov 6, 2021
@finphie finphie deleted the enhancement/GetLength branch November 6, 2021 00:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant