diff --git a/migrations/string_normalization/migration.go b/migrations/string_normalization/migration.go index 0c1c74d267..512ca00a2f 100644 --- a/migrations/string_normalization/migration.go +++ b/migrations/string_normalization/migration.go @@ -19,6 +19,8 @@ package string_normalization import ( + "golang.org/x/text/unicode/norm" + "github.com/onflow/cadence/migrations" "github.com/onflow/cadence/runtime/interpreter" "github.com/onflow/cadence/runtime/sema" @@ -41,13 +43,30 @@ func (StringNormalizingMigration) Migrate( _ interpreter.StorageMapKey, value interpreter.Value, _ *interpreter.Interpreter, -) (interpreter.Value, error) { +) ( + interpreter.Value, + error, +) { + + // Normalize strings and characters to NFC. + // If the value is already in NFC, skip the migration. + switch value := value.(type) { case *interpreter.StringValue: - return interpreter.NewUnmeteredStringValue(value.Str), nil + unnormalizedStr := value.UnnormalizedStr + normalizedStr := norm.NFC.String(unnormalizedStr) + if normalizedStr == unnormalizedStr { + return nil, nil + } + return interpreter.NewStringValue_Unsafe(normalizedStr, unnormalizedStr), nil //nolint:staticcheck case interpreter.CharacterValue: - return interpreter.NewUnmeteredCharacterValue(value.Str), nil + unnormalizedStr := value.UnnormalizedStr + normalizedStr := norm.NFC.String(unnormalizedStr) + if normalizedStr == unnormalizedStr { + return nil, nil + } + return interpreter.NewCharacterValue_Unsafe(normalizedStr, unnormalizedStr), nil //nolint:staticcheck } return nil, nil diff --git a/runtime/interpreter/value.go b/runtime/interpreter/value.go index adbe758703..720983a612 100644 --- a/runtime/interpreter/value.go +++ b/runtime/interpreter/value.go @@ -817,10 +817,21 @@ type CharacterValue struct { UnnormalizedStr string } -func NewUnmeteredCharacterValue(r string) CharacterValue { +func NewUnmeteredCharacterValue(str string) CharacterValue { return CharacterValue{ - Str: norm.NFC.String(r), - UnnormalizedStr: r, + Str: norm.NFC.String(str), + UnnormalizedStr: str, + } +} + +// Deprecated: NewStringValue_UnsafeNewCharacterValue_Unsafe creates a new character value +// from the given normalized and unnormalized string. +// NOTE: this function is unsafe, as it does not normalize the string. +// It should only be used for e.g. migration purposes. +func NewCharacterValue_Unsafe(normalizedStr, unnormalizedStr string) CharacterValue { + return CharacterValue{ + Str: normalizedStr, + UnnormalizedStr: unnormalizedStr, } } @@ -1045,6 +1056,19 @@ func NewUnmeteredStringValue(str string) *StringValue { } } +// Deprecated: NewStringValue_Unsafe creates a new string value +// from the given normalized and unnormalized string. +// NOTE: this function is unsafe, as it does not normalize the string. +// It should only be used for e.g. migration purposes. +func NewStringValue_Unsafe(normalizedStr, unnormalizedStr string) *StringValue { + return &StringValue{ + Str: normalizedStr, + UnnormalizedStr: unnormalizedStr, + // a negative value indicates the length has not been initialized, see Length() + length: -1, + } +} + func NewStringValue( memoryGauge common.MemoryGauge, memoryUsage common.MemoryUsage,