-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(marshal)!: compare strings by codepoint #2008
Draft
erights
wants to merge
2
commits into
master
Choose a base branch
from
markm-rank-strings-by-codepoint
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* eslint-disable no-bitwise, @endo/restrict-comparison-operands */ | ||
import { Fail, q } from '@endo/errors'; | ||
|
||
import { | ||
makeEncodePassable, | ||
makeDecodePassable, | ||
} from '../src/encodePassable.js'; | ||
import { compareRank, makeComparatorKit } from '../src/rankOrder.js'; | ||
|
||
const buffers = { | ||
__proto__: null, | ||
r: [], | ||
'?': [], | ||
'!': [], | ||
}; | ||
const resetBuffers = () => { | ||
buffers.r = []; | ||
buffers['?'] = []; | ||
buffers['!'] = []; | ||
}; | ||
const cursors = { | ||
__proto__: null, | ||
r: 0, | ||
'?': 0, | ||
'!': 0, | ||
}; | ||
const resetCursors = () => { | ||
cursors.r = 0; | ||
cursors['?'] = 0; | ||
cursors['!'] = 0; | ||
}; | ||
|
||
const encodeThing = (prefix, r) => { | ||
buffers[prefix].push(r); | ||
// With this encoding, all things with the same prefix have the same rank | ||
return prefix; | ||
}; | ||
|
||
const decodeThing = (prefix, e) => { | ||
prefix === e || | ||
Fail`expected encoding ${q(e)} to simply be the prefix ${q(prefix)}`; | ||
(cursors[prefix] >= 0 && cursors[prefix] < buffers[prefix].length) || | ||
Fail`while decoding ${q(e)}, expected cursors[${q(prefix)}], i.e., ${q( | ||
cursors[prefix], | ||
)} <= ${q(buffers[prefix].length)}`; | ||
const thing = buffers[prefix][cursors[prefix]]; | ||
cursors[prefix] += 1; | ||
return thing; | ||
}; | ||
|
||
const encodePassableInternal = makeEncodePassable({ | ||
encodeRemotable: r => encodeThing('r', r), | ||
encodePromise: p => encodeThing('?', p), | ||
encodeError: er => encodeThing('!', er), | ||
}); | ||
|
||
export const encodePassableInternal2 = makeEncodePassable({ | ||
encodeRemotable: r => encodeThing('r', r), | ||
encodePromise: p => encodeThing('?', p), | ||
encodeError: er => encodeThing('!', er), | ||
format: 'compactOrdered', | ||
}); | ||
|
||
export const encodePassable = passable => { | ||
resetBuffers(); | ||
return encodePassableInternal(passable); | ||
}; | ||
|
||
export const encodePassable2 = passable => { | ||
resetBuffers(); | ||
return encodePassableInternal2(passable); | ||
}; | ||
export const decodePassableInternal = makeDecodePassable({ | ||
decodeRemotable: e => decodeThing('r', e), | ||
decodePromise: e => decodeThing('?', e), | ||
decodeError: e => decodeThing('!', e), | ||
}); | ||
|
||
export const decodePassable = encoded => { | ||
resetCursors(); | ||
return decodePassableInternal(encoded); | ||
}; | ||
|
||
const compareRemotables = (x, y) => | ||
compareRank(encodeThing('r', x), encodeThing('r', y)); | ||
|
||
export const { comparator: compareFull } = makeComparatorKit(compareRemotables); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import test from '@endo/ses-ava/prepare-endo.js'; | ||
|
||
import { compareRank } from '../src/rankOrder.js'; | ||
import { encodePassable } from './encodePassable-for-testing.js'; | ||
|
||
/** | ||
* Essentially a ponyfill for Array.prototype.toSorted, for use before | ||
* we can always rely on the platform to provide it. | ||
* | ||
* @param {string[]} strings | ||
* @param {( | ||
* left: string, | ||
* right: string | ||
* ) => import('../src/types.js').RankComparison} comp | ||
* @returns {string[]} | ||
*/ | ||
const sorted = (strings, comp) => [...strings].sort(comp); | ||
|
||
test('unicode code point order', t => { | ||
// Test case from | ||
// https://icu-project.org/docs/papers/utf16_code_point_order.html | ||
const str0 = '\u{ff61}'; | ||
const str3 = '\u{d800}\u{dc02}'; | ||
|
||
// str1 and str2 become impossible examples once we prohibit | ||
// non - well - formed strings. | ||
// See https://github.com/endojs/endo/pull/2002 | ||
const str1 = '\u{d800}X'; | ||
const str2 = '\u{d800}\u{ff61}'; | ||
|
||
// harden to ensure it is not sorted in place, just for sanity | ||
const strs = harden([str0, str1, str2, str3]); | ||
|
||
/** | ||
* @param {string} left | ||
* @param {string} right | ||
* @returns {import('../src/types.js').RankComparison} | ||
*/ | ||
const nativeComp = (left, right) => | ||
// eslint-disable-next-line no-nested-ternary | ||
left < right ? -1 : left > right ? 1 : 0; | ||
|
||
const nativeSorted = sorted(strs, nativeComp); | ||
|
||
t.deepEqual(nativeSorted, [str1, str3, str2, str0]); | ||
|
||
const rankSorted = sorted(strs, compareRank); | ||
|
||
t.deepEqual(rankSorted, [str1, str2, str0, str3]); | ||
|
||
const nativeEncComp = (left, right) => | ||
nativeComp(encodePassable(left), encodePassable(right)); | ||
|
||
const nativeEncSorted = sorted(strs, nativeEncComp); | ||
|
||
t.deepEqual(nativeEncSorted, nativeSorted); | ||
|
||
const rankEncComp = (left, right) => | ||
compareRank(encodePassable(left), encodePassable(right)); | ||
|
||
const rankEncSorted = sorted(strs, rankEncComp); | ||
|
||
t.deepEqual(rankEncSorted, rankSorted); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// modeled on test-string-rank-order.js | ||
import test from '@endo/ses-ava/prepare-endo.js'; | ||
|
||
import { compareKeys } from '../src/keys/compareKeys.js'; | ||
|
||
/** | ||
* Essentially a ponyfill for Array.prototype.toSorted, for use before | ||
* we can always rely on the platform to provide it. | ||
* | ||
* @param {string[]} strings | ||
* @param {( | ||
* left: string, | ||
* right: string | ||
* ) => import('@endo/marshal').RankComparison} comp | ||
* @returns {string[]} | ||
*/ | ||
const sorted = (strings, comp) => [...strings].sort(comp); | ||
|
||
test('unicode code point order', t => { | ||
// Test case from | ||
// https://icu-project.org/docs/papers/utf16_code_point_order.html | ||
const str0 = '\u{ff61}'; | ||
const str3 = '\u{d800}\u{dc02}'; | ||
|
||
// str1 and str2 become impossible examples once we prohibit | ||
// non - well - formed strings. | ||
// See https://github.com/endojs/endo/pull/2002 | ||
const str1 = '\u{d800}X'; | ||
const str2 = '\u{d800}\u{ff61}'; | ||
|
||
// harden to ensure it is not sorted in place, just for sanity | ||
const strs = harden([str0, str1, str2, str3]); | ||
|
||
/** | ||
* @param {string} left | ||
* @param {string} right | ||
* @returns {import('@endo/marshal').RankComparison} | ||
*/ | ||
const nativeComp = (left, right) => | ||
// eslint-disable-next-line no-nested-ternary | ||
left < right ? -1 : left > right ? 1 : 0; | ||
|
||
const nativeSorted = sorted(strs, nativeComp); | ||
|
||
t.deepEqual(nativeSorted, [str1, str3, str2, str0]); | ||
|
||
// @ts-expect-error We know that for strings, `compareKeys` never returns | ||
// NaN because it never judges strings to be incomparable. Thus, the | ||
// KeyComparison it returns happens to also be a RankComparison we can | ||
// sort with. | ||
const keySorted = sorted(strs, compareKeys); | ||
|
||
t.deepEqual(keySorted, [str1, str2, str0, str3]); | ||
}); |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gibson042 is this true? It was true for my small test case, which proves very little. Will the same property also be true for
compactOrdered
? For either, does restricting these strings to well-ordered have any effect on whether their encoding is order preserving?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is true now, but I think that's a mistake...
recordNames
and any similar function that.sort()
s an array of strings in marshal or a related package should probably be updated to.sort(compareByCodePoints)
so the encoding of Copy{Record,Set,Bag,Map}s and their own comparison is consistent with that of their constituent strings.Which unfortunately complicates adoption if we have existing use of any such strings.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, and bad news!
Grepping for
.sort()
specifically with nothing between the parens, I see 96 occurrences in agoric-sdk and 26 in endo. Some may not be or contain strings. But still, fixing all that do will be disruptive. And the longer we wait, the more disruptive it'll be.I'm putting this back into Draft until we decide what our plan is. Attn @ivanlei
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any practical way to scan a recent snapshot of our chain and somehow see how many persistent strings are
?
How hard would it be?
Attn @mhofman
NOT URGENT.