diff --git a/packages/marshal/index.js b/packages/marshal/index.js index 3e9b3afc87..f8bc9ca22e 100644 --- a/packages/marshal/index.js +++ b/packages/marshal/index.js @@ -17,6 +17,7 @@ export { export { trivialComparator, + compareByCodePoints, assertRankSorted, compareRank, isRankSorted, diff --git a/packages/marshal/src/rankOrder.js b/packages/marshal/src/rankOrder.js index e11c9200f4..b3439f7fd7 100644 --- a/packages/marshal/src/rankOrder.js +++ b/packages/marshal/src/rankOrder.js @@ -46,9 +46,46 @@ const { entries, fromEntries, setPrototypeOf, is } = Object; */ const sameValueZero = (x, y) => x === y || is(x, y); +/** + * @param {any} left + * @param {any} right + * @returns {RankComparison} + */ export const trivialComparator = (left, right) => // eslint-disable-next-line no-nested-ternary, @endo/restrict-comparison-operands left < right ? -1 : left === right ? 0 : 1; +harden(trivialComparator); + +// Apparently eslint confused about whether the function can ever exit +// without an explicit return. +// eslint-disable-next-line jsdoc/require-returns-check +/** + * @param {string} left + * @param {string} right + * @returns {RankComparison} + */ +export const compareByCodePoints = (left, right) => { + const leftIter = left[Symbol.iterator](); + const rightIter = right[Symbol.iterator](); + for (;;) { + const { value: leftChar } = leftIter.next(); + const { value: rightChar } = rightIter.next(); + if (leftChar === undefined && rightChar === undefined) { + return 0; + } else if (leftChar === undefined) { + // left is a prefix of right. + return -1; + } else if (rightChar === undefined) { + // right is a prefix of left. + return 1; + } + const leftCodepoint = /** @type {number} */ (leftChar.codePointAt(0)); + const rightCodepoint = /** @type {number} */ (rightChar.codePointAt(0)); + if (leftCodepoint < rightCodepoint) return -1; + if (leftCodepoint > rightCodepoint) return 1; + } +}; +harden(compareByCodePoints); /** * @typedef {Record} PassStyleRanksRecord @@ -140,8 +177,7 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => { return 0; } case 'boolean': - case 'bigint': - case 'string': { + case 'bigint': { // Within each of these passStyles, the rank ordering agrees with // JavaScript's relational operators `<` and `>`. if (left < right) { @@ -151,6 +187,9 @@ export const makeComparatorKit = (compareRemotables = (_x, _y) => 0) => { return 1; } } + case 'string': { + return compareByCodePoints(left, right); + } case 'symbol': { return comparator( nameForPassableSymbol(left), diff --git a/packages/marshal/test/test-string-rank-order.js b/packages/marshal/test/test-string-rank-order.js new file mode 100644 index 0000000000..5cbee4599e --- /dev/null +++ b/packages/marshal/test/test-string-rank-order.js @@ -0,0 +1,36 @@ +import { test } from './prepare-test-env-ava.js'; + +import { compareRank } from '../src/rankOrder.js'; + +test('unicode code point order', t => { + // Test case from + // https://icu-project.org/docs/papers/utf16_code_point_order.html + const str0 = '\u{ff61}'; + const str3 = '\u{d800}\u{dc02}'; + + // str1 and str2 become impossible examples once we prohibit + // non - well - formed strings. + // See https://github.com/endojs/endo/pull/2002 + const str1 = '\u{d800}X'; + const str2 = '\u{d800}\u{ff61}'; + + // harden to ensure it is not sorted in place, just for sanity + const strs = harden([str0, str1, str2, str3]); + + /** + * @param {string} left + * @param {string} right + * @returns {import('../src/types.js').RankComparison} + */ + const nativeComp = (left, right) => + // eslint-disable-next-line no-nested-ternary + left < right ? -1 : left > right ? 1 : 0; + + const nativeSorted = strs.toSorted(nativeComp); + + t.deepEqual(nativeSorted, [str1, str3, str2, str0]); + + const rankSorted = strs.toSorted(compareRank); + + t.deepEqual(rankSorted, [str1, str2, str0, str3]); +}); diff --git a/packages/patterns/test/test-string-key-order.js b/packages/patterns/test/test-string-key-order.js new file mode 100644 index 0000000000..55c546711f --- /dev/null +++ b/packages/patterns/test/test-string-key-order.js @@ -0,0 +1,38 @@ +// modeled on test-string-rank-order.js + +import { test } from './prepare-test-env-ava.js'; + +import { compareKeys } from '../src/keys/compareKeys.js'; + +test('unicode code point order', t => { + // Test case from + // https://icu-project.org/docs/papers/utf16_code_point_order.html + const str0 = '\u{ff61}'; + const str3 = '\u{d800}\u{dc02}'; + + // str1 and str2 become impossible examples once we prohibit + // non - well - formed strings. + // See https://github.com/endojs/endo/pull/2002 + const str1 = '\u{d800}X'; + const str2 = '\u{d800}\u{ff61}'; + + // harden to ensure it is not sorted in place, just for sanity + const strs = harden([str0, str1, str2, str3]); + + /** + * @param {string} left + * @param {string} right + * @returns {import('../src/types.js').KeyComparison} + */ + const nativeComp = (left, right) => + // eslint-disable-next-line no-nested-ternary + left < right ? -1 : left > right ? 1 : 0; + + const nativeSorted = strs.toSorted(nativeComp); + + t.deepEqual(nativeSorted, [str1, str3, str2, str0]); + + const keySorted = strs.toSorted(compareKeys); + + t.deepEqual(keySorted, [str1, str2, str0, str3]); +});