diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cc620f6..e56df48e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - _...Add new stuff here..._ ### 🐞 Bug fixes +- The `index-of`, `length`, and `slice` expression operators count a UTF-16 surrogate pair as a single character. ([#779](https://github.com/maplibre/maplibre-style-spec/pull/779)) - _...Add new stuff here..._ ## 20.3.0 diff --git a/src/expression/definitions/index_of.ts b/src/expression/definitions/index_of.ts index 844a4573..a8202ad0 100644 --- a/src/expression/definitions/index_of.ts +++ b/src/expression/definitions/index_of.ts @@ -60,16 +60,24 @@ class IndexOf implements Expression { throw new RuntimeError(`Expected first argument to be of type boolean, string, number or null, but found ${toString(typeOf(needle))} instead.`); } - if (!isValidNativeType(haystack, ['string', 'array'])) { - throw new RuntimeError(`Expected second argument to be of type array or string, but found ${toString(typeOf(haystack))} instead.`); + let fromIndex; + if (this.fromIndex) { + fromIndex = (this.fromIndex.evaluate(ctx) as number); } - if (this.fromIndex) { - const fromIndex = (this.fromIndex.evaluate(ctx) as number); + if (isValidNativeType(haystack, ['string'])) { + const rawIndex = haystack.indexOf(needle, fromIndex); + if (rawIndex === -1) { + return -1; + } else { + // The index may be affected by surrogate pairs, so get the length of the preceding substring. + return [...haystack.slice(0, rawIndex)].length; + } + } else if (isValidNativeType(haystack, ['array'])) { return haystack.indexOf(needle, fromIndex); + } else { + throw new RuntimeError(`Expected second argument to be of type array or string, but found ${toString(typeOf(haystack))} instead.`); } - - return haystack.indexOf(needle); } eachChild(fn: (_: Expression) => void) { diff --git a/src/expression/definitions/length.ts b/src/expression/definitions/length.ts index 2579bd05..ba5ec56c 100644 --- a/src/expression/definitions/length.ts +++ b/src/expression/definitions/length.ts @@ -33,7 +33,8 @@ class Length implements Expression { evaluate(ctx: EvaluationContext) { const input = this.input.evaluate(ctx); if (typeof input === 'string') { - return input.length; + // The length may be affected by surrogate pairs. + return [...input].length; } else if (Array.isArray(input)) { return input.length; } else { diff --git a/src/expression/definitions/slice.ts b/src/expression/definitions/slice.ts index d550941d..94f92371 100644 --- a/src/expression/definitions/slice.ts +++ b/src/expression/definitions/slice.ts @@ -56,16 +56,19 @@ class Slice implements Expression { const input = (this.input.evaluate(ctx) as any); const beginIndex = (this.beginIndex.evaluate(ctx) as number); - if (!isValidNativeType(input, ['string', 'array'])) { - throw new RuntimeError(`Expected first argument to be of type array or string, but found ${toString(typeOf(input))} instead.`); + let endIndex; + if (this.endIndex) { + endIndex = (this.endIndex.evaluate(ctx) as number); } - if (this.endIndex) { - const endIndex = (this.endIndex.evaluate(ctx) as number); + if (isValidNativeType(input, ['string'])) { + // Indices may be affected by surrogate pairs. + return [...input].slice(beginIndex, endIndex).join(''); + } else if (isValidNativeType(input, ['array'])) { return input.slice(beginIndex, endIndex); + } else { + throw new RuntimeError(`Expected first argument to be of type array or string, but found ${toString(typeOf(input))} instead.`); } - - return input.slice(beginIndex); } eachChild(fn: (_: Expression) => void) { diff --git a/src/expression/expression.test.ts b/src/expression/expression.test.ts index d45d9bbc..605d2ab6 100644 --- a/src/expression/expression.test.ts +++ b/src/expression/expression.test.ts @@ -388,3 +388,244 @@ describe('Distance expression', () => { }); }); }); + +describe('index-of expression', () => { + test('requires a needle', () => { + const response = createExpression(['index-of']); + expect(response.result).toBe('error'); + }); + test('requires a haystack', () => { + const response = createExpression(['index-of', 'a']); + expect(response.result).toBe('error'); + }); + test('rejects a fourth argument', () => { + const response = createExpression(['index-of', 'a', 'abc', 1, 8]); + expect(response.result).toBe('error'); + }); + test('requires a primitive as the needle', () => { + const response = createExpression(['index-of', ['literal', ['a']], ['a', 'b', 'c']]); + expect(response.result).toBe('error'); + }); + test('requires a string or array as the haystack', () => { + const response = createExpression(['index-of', 't', true]); + expect(response.result).toBe('error'); + }); + test('finds an empty substring in an empty string', () => { + const response = createExpression(['index-of', '', '']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0); + }); + test('finds an empty substring in a non-empty string', () => { + const response = createExpression(['index-of', '', 'abc']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0); + }); + test('cannot find a non-empty substring in an empty string', () => { + const response = createExpression(['index-of', 'abc', '']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1); + }); + test('finds a non-empty substring in a non-empty string', () => { + const response = createExpression(['index-of', 'b', 'abc']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1); + }); + test('only finds the first occurrence in a string', () => { + const response = createExpression(['index-of', 'b', 'abbc']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1); + }); + test('starts looking for the substring at a positive start index', () => { + const response = createExpression(['index-of', 'a', 'abc', 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1); + }); + test('starts looking for the substring at a negative start index', () => { + const response = createExpression(['index-of', 'c', 'abc', -1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2); + }); + test('counts a non-ASCII character as a single character', () => { + const response = createExpression(['index-of', '镇', '市镇']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1); + }); + test('counts a surrogate pair as a single character', () => { + const response = createExpression(['index-of', '市镇', '丐𦨭市镇']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2); + }); + test('cannot find an element in an empty array', () => { + const response = createExpression(['index-of', 1, ['literal', []]]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1); + }); + test('finds an element in a non-empty array', () => { + const response = createExpression(['index-of', 2, ['literal', [1, 2, 3]]]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1); + }); + test('only finds the first occurrence in an array', () => { + const response = createExpression(['index-of', 2, ['literal', [1, 2, 2, 3]]]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(1); + }); + test('starts looking for the element at a positive start index', () => { + const response = createExpression(['index-of', 1, ['literal', [1, 2, 3]], 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(-1); + }); + test('starts looking for the element at a negative start index', () => { + const response = createExpression(['index-of', 3, ['literal', [1, 2, 3]], -1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2); + }); +}); + +describe('length expression', () => { + test('requires an argument', () => { + const response = createExpression(['length']); + expect(response.result).toBe('error'); + }); + test('requires a string or array as the argument', () => { + const response = createExpression(['length', true]); + expect(response.result).toBe('error'); + }); + test('rejects a second argument', () => { + const response = createExpression(['length', 'abc', 'def']); + expect(response.result).toBe('error'); + }); + test('measures an empty string', () => { + const response = createExpression(['length', '']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0); + }); + test('measures a non-empty string', () => { + const response = createExpression(['length', 'abc']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(3); + }); + test('counts a non-ASCII character as a single character', () => { + const response = createExpression(['length', '市镇']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(2); + }); + test('counts a surrogate pair as a single character', () => { + const response = createExpression(['length', '丐𦨭市镇']); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(4); + }); + test('measures an empty array', () => { + const response = createExpression(['length', ['literal', []]]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(0); + }); + test('measures a non-empty array', () => { + const response = createExpression(['length', ['literal', [1, 2, 3]]]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(3); + }); +}); + +describe('slice expression', () => { + test('requires an input argument', () => { + const response = createExpression(['slice']); + expect(response.result).toBe('error'); + }); + test('requires a start index argument', () => { + const response = createExpression(['slice', 'abc']); + expect(response.result).toBe('error'); + }); + test('rejects a fourth argument', () => { + const response = createExpression(['slice', 'abc', 0, 1, 8]); + expect(response.result).toBe('error'); + }); + test('requires a string or array as the input argument', () => { + const response = createExpression(['slice', true, 0]); + expect(response.result).toBe('error'); + }); + test('requires a number as the start index argument', () => { + const response = createExpression(['slice', 'abc', true]); + expect(response.result).toBe('error'); + }); + test('slices an empty string', () => { + const response = createExpression(['slice', '', 0]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(''); + }); + test('slices a string starting at the beginning', () => { + const response = createExpression(['slice', 'abc', 0]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('abc'); + }); + test('slices a string starting at the middle', () => { + const response = createExpression(['slice', 'abc', 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('bc'); + }); + test('slices a string starting at the end', () => { + const response = createExpression(['slice', 'abc', 3]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(''); + }); + test('slices a string backwards from the end', () => { + const response = createExpression(['slice', 'abc', -2]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('bc'); + }); + test('slices a string by a zero-length range', () => { + const response = createExpression(['slice', 'abc', 1, 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(''); + }); + test('slices a string by a negative-length range', () => { + const response = createExpression(['slice', 'abc', 2, 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe(''); + }); + test('avoids splitting a non-ASCII character', () => { + const response = createExpression(['slice', '市镇', 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('镇'); + }); + test('avoids splitting a surrogate pair', () => { + const response = createExpression(['slice', '丐𦨭市镇', 2]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toBe('市镇'); + }); + test('slices an empty array', () => { + const response = createExpression(['slice', ['literal', []], 0]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]); + }); + test('slices an array starting at the beginning', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], 0]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([1, 2, 3]); + }); + test('slices an array starting at the middle', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([2, 3]); + }); + test('slices an array starting at the end', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], 3]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]); + }); + test('slices an array backwards from the end', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], -2]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([2, 3]); + }); + test('slices an array by a zero-length range', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], 1, 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]); + }); + test('slices an array by a negative-length range', () => { + const response = createExpression(['slice', ['literal', [1, 2, 3]], 2, 1]); + expect(response.result).toBe('success'); + expect((response.value as StyleExpression)?.evaluate({zoom: 20})).toEqual([]); + }); +}); diff --git a/src/reference/v8.json b/src/reference/v8.json index 889de1b7..c9d35dc1 100644 --- a/src/reference/v8.json +++ b/src/reference/v8.json @@ -2826,7 +2826,7 @@ } }, "index-of": { - "doc": "Returns the first position at which an item can be found in an array or a substring can be found in a string, or `-1` if the input cannot be found. Accepts an optional index from where to begin the search.", + "doc": "Returns the first position at which an item can be found in an array or a substring can be found in a string, or `-1` if the input cannot be found. Accepts an optional index from where to begin the search. In a string, a UTF-16 surrogate pair counts as a single position.", "example": { "syntax": { "method": ["value", "value", "number?"], @@ -2844,7 +2844,7 @@ } }, "slice": { - "doc": "Returns an item from an array or a substring from a string from a specified start index, or between a start index and an end index if set. The return value is inclusive of the start index but not of the end index.", + "doc": "Returns an item from an array or a substring from a string from a specified start index, or between a start index and an end index if set. The return value is inclusive of the start index but not of the end index. In a string, a UTF-16 surrogate pair counts as a single position.", "example": { "syntax": { "method": ["value", "number", "number?"], @@ -3380,7 +3380,7 @@ } }, "length": { - "doc": "Gets the length of an array or string.", + "doc": "Gets the length of an array or string. In a string, a UTF-16 surrogate pair counts as a single position.", "example": { "syntax": { "method": ["array"],