From a85616fdf4c1cf4104ef4921cdade4fb040e4068 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 23 May 2023 16:19:08 -0400 Subject: [PATCH 01/14] fix(marshal): Add a smaller and more efficient passable encoding Fixes #1349 --- packages/marshal/src/encodePassable.js | 309 ++++++++++++++----- packages/marshal/test/test-encodePassable.js | 29 ++ 2 files changed, 265 insertions(+), 73 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index fe24a2b0c2..6f3ed482d9 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -224,11 +224,140 @@ const decodeBigInt = encoded => { return n; }; -// `'\u0000'` is the terminator after elements. -// `'\u0001'` is the backslash-like escape character, for -// escaping both of these characters. +// Escape all characters from U+0000 NULL to U+001F INFORMATION SEPARATOR ONE +// like `!` to avoid JSON.stringify expansion to +// `\uHHHH`, and specially escape U+0020 SPACE (the array element terminator) +// as `!_` and U+0021 EXCLAMATION MARK (the escape prefix) as `!|`. +// Relative lexicographic ordering is preserved by this mapping of any character +// at or before `!` in the contiguous range [0x00..0x21] to a respective +// character in [0x21..0x40, 0x5F, 0x7C] preceded by `!` (which is itself in the +// replaced range). +// Similarly, escape `^` as `_@` and `_` as `__` because `^` indicates the start +// of an encoded array. +const stringEscapes = Array(0x22) + .fill(undefined) + .map((_, cp) => { + switch (String.fromCharCode(cp)) { + case ' ': + return '!_'; + case '!': + return '!|'; + default: + return `!${String.fromCharCode(cp + 0x21)}`; + } + }); +stringEscapes['^'.charCodeAt(0)] = '_@'; +stringEscapes['_'.charCodeAt(0)] = '__'; + +const encodeStringWithEscapes = str => + `s${str.replaceAll(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)])}`; +const decodeStringWithEscapes = encoded => { + return encoded + .slice(1) + .replaceAll(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { + switch (esc) { + case '!_': + return ' '; + case '!|': + return '!'; + case '_@': + return '^'; + case '__': + return '_'; + default: { + const ch = /** @type {string} */ (suffix); + (prefix === '!' && ch >= '!' && ch <= '@') || + Fail`invalid string escape: ${q(esc)}`; + return String.fromCharCode(ch.charCodeAt(0) - 0x21); + } + } + }); +}; + +const encodeStringWithoutEscapes = str => `s${str}`; +const decodeStringWithoutEscapes = encoded => encoded.slice(1); + +/** + * Encodes an array into a sequence of encoded elements, each terminated by a + * space (which is part of the escaped range in encoded strings). + * + * @param {unknown[]} array + * @param {(p: Passable) => string} encodePassable + * @returns {string} + */ +const encodeArrayWithoutEscapes = (array, encodePassable) => { + const chars = ['^']; + for (const element of array) { + const enc = encodePassable(element); + chars.push(enc, ' '); + } + return chars.join(''); +}; + +/** + * @param {string} encoded + * @param {(encoded: string) => Passable} decodePassable + * @param {number} [skip] + * @returns {Array} + */ +const decodeArrayWithoutEscapes = (encoded, decodePassable, skip = 0) => { + const elements = []; + let depth = 0; + // Scan encoded rather than its tail to avoid slow `substring` in XS. + // https://github.com/endojs/endo/issues/1984 + let nextIndex = skip + 1; + let currentElementStart = skip + 1; + for (const { 0: ch, index: i } of encoded.matchAll(/[\^ ]/g)) { + const index = /** @type {number} */ (i); + if (index <= skip) { + if (index === skip) { + ch === '^' || Fail`Encoded array expected: ${encoded.slice(skip)}`; + } + } else if (ch === '^') { + // This is the start of a nested array. + // TODO: Since the syntax of nested arrays must be validated as part of + // decoding the outer one, consider decoding them here into a shared cache + // rather than discarding information about their contents until the later + // decodePassable. + depth += 1; + } else { + // This is a terminated element. + if (index === nextIndex) { + // A terminator after `[` or an another terminator indicates that an array is done. + depth -= 1; + depth >= 0 || + // prettier-ignore + Fail`unexpected array element terminator: ${encoded.slice(skip, index + 2)}`; + } + if (depth === 0) { + // We have a complete element of the topmost array. + elements.push( + decodePassable(encoded.slice(currentElementStart, index)), + ); + currentElementStart = index + 1; + } + } + // Advance the index. + nextIndex = index + 1; + } + depth === 0 || Fail`unterminated array: ${encoded.slice(skip)}`; + nextIndex === encoded.length || + Fail`unterminated array element: ${encoded.slice(currentElementStart)}`; + return harden(elements); +}; -const encodeArray = (array, encodePassable) => { +/** + * Performs the original array encoding, which escapes all array elements rather + * than just strings (`\u0000` as the element terminator and `\u0001` as the + * escape prefix for `\u0000` or `\u0001`). + * This necessitated an undesirable amount of iteration and expansion; see + * https://github.com/endojs/endo/pull/1260#discussion_r960369826 + * + * @param {unknown[]} array + * @param {(p: Passable) => string} encodePassable + * @returns {string} + */ +const encodeArrayWithEscapes = (array, encodePassable) => { const chars = ['[']; for (const element of array) { const enc = encodePassable(element); @@ -249,7 +378,7 @@ const encodeArray = (array, encodePassable) => { * @param {number} [skip] * @returns {Array} */ -const decodeArray = (encoded, decodePassable, skip = 0) => { +const decodeArrayWithEscapes = (encoded, decodePassable, skip = 0) => { const elements = []; const elemChars = []; // Use a string iterator to avoid slow indexed access in XS. @@ -287,13 +416,13 @@ const decodeArray = (encoded, decodePassable, skip = 0) => { return harden(elements); }; -const encodeRecord = (record, encodePassable) => { +const encodeRecord = (record, encodeArray, encodePassable) => { const names = recordNames(record); const values = recordValues(record, names); return `(${encodeArray(harden([names, values]), encodePassable)}`; }; -const decodeRecord = (encoded, decodePassable) => { +const decodeRecord = (encoded, decodeArray, decodePassable) => { assert(encoded.charAt(0) === '('); // Skip the "(" inside `decodeArray` to avoid slow `substring` in XS. // https://github.com/endojs/endo/issues/1984 @@ -312,10 +441,10 @@ const decodeRecord = (encoded, decodePassable) => { return record; }; -const encodeTagged = (tagged, encodePassable) => +const encodeTagged = (tagged, encodeArray, encodePassable) => `:${encodeArray(harden([getTag(tagged), tagged.payload]), encodePassable)}`; -const decodeTagged = (encoded, decodePassable) => { +const decodeTagged = (encoded, decodeArray, decodePassable) => { assert(encoded.charAt(0) === ':'); // Skip the ":" inside `decodeArray` to avoid slow `substring` in XS. // https://github.com/endojs/endo/issues/1984 @@ -341,6 +470,7 @@ const decodeTagged = (encoded, decodePassable) => { * error: Error, * encodeRecur: (p: Passable) => string, * ) => string} [encodeError] + * @property {boolean} [xxx] */ /** @@ -352,11 +482,17 @@ export const makeEncodePassable = (encodeOptions = {}) => { encodeRemotable = (rem, _) => Fail`remotable unexpected: ${rem}`, encodePromise = (prom, _) => Fail`promise unexpected: ${prom}`, encodeError = (err, _) => Fail`error unexpected: ${err}`, + xxx = false, } = encodeOptions; - const encodePassable = passable => { + const encodeString = xxx + ? encodeStringWithEscapes + : encodeStringWithoutEscapes; + const encodeArray = xxx ? encodeArrayWithoutEscapes : encodeArrayWithEscapes; + + const innerEncode = passable => { if (isErrorLike(passable)) { - return encodeError(passable, encodePassable); + return encodeError(passable, innerEncode); } const passStyle = passStyleOf(passable); switch (passStyle) { @@ -370,7 +506,7 @@ export const makeEncodePassable = (encodeOptions = {}) => { return encodeBinary64(passable); } case 'string': { - return `s${passable}`; + return encodeString(passable); } case 'boolean': { return `b${passable}`; @@ -379,40 +515,45 @@ export const makeEncodePassable = (encodeOptions = {}) => { return encodeBigInt(passable); } case 'remotable': { - const result = encodeRemotable(passable, encodePassable); + const result = encodeRemotable(passable, innerEncode); result.charAt(0) === 'r' || Fail`internal: Remotable encoding must start with "r": ${result}`; return result; } case 'error': { - const result = encodeError(passable, encodePassable); + const result = encodeError(passable, innerEncode); result.charAt(0) === '!' || Fail`internal: Error encoding must start with "!": ${result}`; return result; } case 'promise': { - const result = encodePromise(passable, encodePassable); + const result = encodePromise(passable, innerEncode); result.charAt(0) === '?' || Fail`internal: Promise encoding must start with "?": ${result}`; return result; } case 'symbol': { - return `y${nameForPassableSymbol(passable)}`; + const encName = encodeString(nameForPassableSymbol(passable)); + return `y${encName.slice(1)}`; } case 'copyArray': { - return encodeArray(passable, encodePassable); + return encodeArray(passable, innerEncode); } case 'copyRecord': { - return encodeRecord(passable, encodePassable); + return encodeRecord(passable, encodeArray, innerEncode); } case 'tagged': { - return encodeTagged(passable, encodePassable); + return encodeTagged(passable, encodeArray, innerEncode); } default: { throw Fail`a ${q(passStyle)} cannot be used as a collection passable`; } } }; + const encodePassable = xxx + ? // A leading "#" indicates the v2 encoding (with escaping in strings rather than arrays). + passable => `#${innerEncode(passable)}` + : innerEncode; return harden(encodePassable); }; harden(makeEncodePassable); @@ -444,57 +585,77 @@ export const makeDecodePassable = (decodeOptions = {}) => { decodeError = (err, _) => Fail`error unexpected: ${err}`, } = decodeOptions; - const decodePassable = encoded => { - switch (encoded.charAt(0)) { - case 'v': { - return null; - } - case 'z': { - return undefined; - } - case 'f': { - return decodeBinary64(encoded); - } - case 's': { - return encoded.substring(1); - } - case 'b': { - if (encoded === 'btrue') { - return true; - } else if (encoded === 'bfalse') { - return false; + const makeInnerDecode = (decodeString, decodeArray) => { + const innerDecode = encoded => { + switch (encoded.charAt(0)) { + case 'v': { + return null; + } + case 'z': { + return undefined; + } + case 'f': { + return decodeBinary64(encoded); + } + case 's': { + return decodeString(encoded); + } + case 'b': { + if (encoded === 'btrue') { + return true; + } else if (encoded === 'bfalse') { + return false; + } + throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${encoded}`; + } + case 'n': + case 'p': { + return decodeBigInt(encoded); + } + case 'r': { + return decodeRemotable(encoded, innerDecode); + } + case '?': { + return decodePromise(encoded, innerDecode); + } + case '!': { + return decodeError(encoded, innerDecode); + } + case 'y': { + const name = decodeString(`s${encoded.slice(1)}`); + return passableSymbolForName(name); + } + case '[': + case '^': { + return decodeArray(encoded, innerDecode); + } + case '(': { + return decodeRecord(encoded, decodeArray, innerDecode); + } + case ':': { + return decodeTagged(encoded, decodeArray, innerDecode); + } + default: { + throw Fail`invalid database key: ${encoded}`; } - throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${encoded}`; - } - case 'n': - case 'p': { - return decodeBigInt(encoded); - } - case 'r': { - return decodeRemotable(encoded, decodePassable); - } - case '?': { - return decodePromise(encoded, decodePassable); - } - case '!': { - return decodeError(encoded, decodePassable); - } - case 'y': { - return passableSymbolForName(encoded.substring(1)); - } - case '[': { - return decodeArray(encoded, decodePassable); - } - case '(': { - return decodeRecord(encoded, decodePassable); - } - case ':': { - return decodeTagged(encoded, decodePassable); - } - default: { - throw Fail`invalid database key: ${encoded}`; } + }; + return innerDecode; + }; + const decodePassable = encoded => { + // A leading "#" indicates the v2 encoding (with escaping in strings rather than arrays). + if (encoded.startsWith('#')) { + const innerDecode = makeInnerDecode( + decodeStringWithEscapes, + decodeArrayWithoutEscapes, + ); + return innerDecode(encoded.slice(1)); } + const innerDecode = makeInnerDecode( + decodeStringWithoutEscapes, + decodeArrayWithEscapes, + ); + return innerDecode(encoded); }; return harden(decodePassable); }; @@ -508,11 +669,13 @@ harden(isEncodedRemotable); /** * @type {Record} * The single prefix characters to be used for each PassStyle category. - * `bigint` is a two character string because each of those characters - * individually is a valid bigint prefix. `n` for "negative" and `p` for - * "positive". The ordering of these prefixes is the same as the - * rankOrdering of their respective PassStyles. This table is imported by - * rankOrder.js for this purpose. + * `bigint` is a two-character string because each of those characters + * individually is a valid bigint prefix (`n` for "negative" and `p` for + * "positive"), and copyArray is a two-character string because one encoding + * prefixes arrays with `[` while the other uses `^` (which is prohibited from + * appearing in an encoded string). + * The ordering of these prefixes is the same as the rankOrdering of their + * respective PassStyles, and rankOrder.js imports the table for this purpose. * * In addition, `|` is the remotable->ordinal mapping prefix: * This is not used in covers but it is @@ -525,7 +688,7 @@ export const passStylePrefixes = { copyRecord: '(', tagged: ':', promise: '?', - copyArray: '[', + copyArray: '[^', boolean: 'b', number: 'f', bigint: 'np', diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 72495b701f..41117ea601 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -66,11 +66,21 @@ const encodePassableInternal = makeEncodePassable({ encodePromise: p => encodeThing('?', p), encodeError: er => encodeThing('!', er), }); +const encodePassableInternal2 = makeEncodePassable({ + encodeRemotable: r => encodeThing('r', r), + encodePromise: p => encodeThing('?', p), + encodeError: er => encodeThing('!', er), + xxx: true, +}); const encodePassable = passable => { resetBuffers(); return encodePassableInternal(passable); }; +const encodePassable2 = passable => { + resetBuffers(); + return encodePassableInternal2(passable); +}; const decodePassableInternal = makeDecodePassable({ decodeRemotable: e => decodeThing('r', e), @@ -123,7 +133,13 @@ const goldenPairs = harden([ test('golden round trips', t => { for (const [k, e] of goldenPairs) { t.is(encodePassable(k), e, 'does k encode as expected'); + t.is(encodePassable2(k), `#${e}`, 'does k small-encode as expected'); t.is(decodePassable(e), k, 'does the key round trip through the encoding'); + t.is( + decodePassable(`#${e}`), + k, + 'does the small-encoded key round trip through the encoding', + ); } // Not round trips t.is(encodePassable(-0), 'f8000000000000000'); @@ -165,6 +181,19 @@ test('Passables round-trip', async t => { }), ); }); +// TODO: Implement via macro +// https://github.com/avajs/ava/blob/main/docs/01-writing-tests.md#reusing-test-logic-through-macros +test('Small-encoded passables round-trip', async t => { + await fc.assert( + fc.property(arbPassable, n => { + const en = encodePassable2(n); + const rt = decodePassable(en); + const er = encodePassable2(rt); + t.is(en, er); + t.is(compareFull(n, rt), 0); + }), + ); +}); test('BigInt encoding comparison corresponds with numeric comparison', async t => { await fc.assert( From d90dfe2f4c7ba89f7b7e9b91087e63838cf7a11e Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 23 May 2023 17:32:32 -0400 Subject: [PATCH 02/14] test(marshal): Make a macro for encodePassable round-trip tests --- packages/marshal/test/test-encodePassable.js | 21 +++++--------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 41117ea601..1b1c14f064 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -170,30 +170,19 @@ const orderInvariants = (x, y) => { } }; -test('Passables round-trip', async t => { +const testRoundTrip = test.macro(async (t, encode) => { await fc.assert( fc.property(arbPassable, n => { - const en = encodePassable(n); + const en = encode(n); const rt = decodePassable(en); - const er = encodePassable(rt); - t.is(en, er); - t.is(compareFull(n, rt), 0); - }), - ); -}); -// TODO: Implement via macro -// https://github.com/avajs/ava/blob/main/docs/01-writing-tests.md#reusing-test-logic-through-macros -test('Small-encoded passables round-trip', async t => { - await fc.assert( - fc.property(arbPassable, n => { - const en = encodePassable2(n); - const rt = decodePassable(en); - const er = encodePassable2(rt); + const er = encode(rt); t.is(en, er); t.is(compareFull(n, rt), 0); }), ); }); +test('original encoding round-trips', testRoundTrip, encodePassable); +test('small encoding round-trips', testRoundTrip, encodePassable2); test('BigInt encoding comparison corresponds with numeric comparison', async t => { await fc.assert( From 961c7c38a10f0c29c1e02d3e62989336159c5b23 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 23 May 2023 17:34:07 -0400 Subject: [PATCH 03/14] refactor(marshal): Use String#replace rather than replaceAll Fixes failures on Node.js v14 --- packages/marshal/src/encodePassable.js | 38 ++++++++++++-------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index 6f3ed482d9..8b1ca13bb5 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -250,28 +250,26 @@ stringEscapes['^'.charCodeAt(0)] = '_@'; stringEscapes['_'.charCodeAt(0)] = '__'; const encodeStringWithEscapes = str => - `s${str.replaceAll(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)])}`; + `s${str.replace(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)])}`; const decodeStringWithEscapes = encoded => { - return encoded - .slice(1) - .replaceAll(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { - switch (esc) { - case '!_': - return ' '; - case '!|': - return '!'; - case '_@': - return '^'; - case '__': - return '_'; - default: { - const ch = /** @type {string} */ (suffix); - (prefix === '!' && ch >= '!' && ch <= '@') || - Fail`invalid string escape: ${q(esc)}`; - return String.fromCharCode(ch.charCodeAt(0) - 0x21); - } + return encoded.slice(1).replace(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { + switch (esc) { + case '!_': + return ' '; + case '!|': + return '!'; + case '_@': + return '^'; + case '__': + return '_'; + default: { + const ch = /** @type {string} */ (suffix); + (prefix === '!' && ch >= '!' && ch <= '@') || + Fail`invalid string escape: ${q(esc)}`; + return String.fromCharCode(ch.charCodeAt(0) - 0x21); } - }); + } + }); }; const encodeStringWithoutEscapes = str => `s${str}`; From 52dbda60877b38b490aca340599fcd57da27cf54 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Sat, 26 Aug 2023 13:06:36 -0400 Subject: [PATCH 04/14] chore(marshal): Improve comments --- packages/marshal/src/encodePassable.js | 37 +++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index 8b1ca13bb5..7d6a745f8b 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -224,16 +224,24 @@ const decodeBigInt = encoded => { return n; }; -// Escape all characters from U+0000 NULL to U+001F INFORMATION SEPARATOR ONE -// like `!` to avoid JSON.stringify expansion to -// `\uHHHH`, and specially escape U+0020 SPACE (the array element terminator) -// as `!_` and U+0021 EXCLAMATION MARK (the escape prefix) as `!|`. -// Relative lexicographic ordering is preserved by this mapping of any character -// at or before `!` in the contiguous range [0x00..0x21] to a respective -// character in [0x21..0x40, 0x5F, 0x7C] preceded by `!` (which is itself in the -// replaced range). -// Similarly, escape `^` as `_@` and `_` as `__` because `^` indicates the start -// of an encoded array. +/** + * A sparse array for which every present index maps a code point in the ASCII + * range to a corresponding escape sequence. + * + * Escapes all characters from U+0000 NULL to U+001F INFORMATION SEPARATOR ONE + * like `!` to avoid JSON.stringify expansion as + * `\uHHHH`, and specially escapes U+0020 SPACE (the array element terminator) + * as `!_` and U+0021 EXCLAMATION MARK (the escape prefix) as `!|` (both chosen + * for visual approximation). + * Relative lexicographic ordering is preserved by this mapping of any character + * at or before `!` in the contiguous range [0x00..0x21] to a respective + * character in [0x21..0x40, 0x5F, 0x7C] preceded by `!` (which is itself in the + * replaced range). + * Similarly, escapes `^` as `_@` and `_` as `__` because `^` indicates the + * start of an encoded array. + * + * @type {Array} + */ const stringEscapes = Array(0x22) .fill(undefined) .map((_, cp) => { @@ -264,7 +272,9 @@ const decodeStringWithEscapes = encoded => { return '_'; default: { const ch = /** @type {string} */ (suffix); - (prefix === '!' && ch >= '!' && ch <= '@') || + // The range of valid escapes is [(0x00+0x21)..(0x1F+0x21)], i.e. + // [0x21..0x40] (U+0021 EXCLAMATION MARK to U+0040 COMMERCIAL AT). + (prefix === '!' && suffix !== undefined && ch >= '!' && ch <= '@') || Fail`invalid string escape: ${q(esc)}`; return String.fromCharCode(ch.charCodeAt(0) - 0x21); } @@ -531,6 +541,8 @@ export const makeEncodePassable = (encodeOptions = {}) => { return result; } case 'symbol': { + // Strings and symbols share encoding logic. + // Encode the name as a string, then replace the `s` prefix. const encName = encodeString(nameForPassableSymbol(passable)); return `y${encName.slice(1)}`; } @@ -620,7 +632,8 @@ export const makeDecodePassable = (decodeOptions = {}) => { return decodeError(encoded, innerDecode); } case 'y': { - const name = decodeString(`s${encoded.slice(1)}`); + // Strings and symbols share decoding logic. + const name = decodeString(encoded); return passableSymbolForName(name); } case '[': From 242d90599b7a619a869555042f3f6a5660b2c6d8 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 29 Aug 2023 14:37:35 -0400 Subject: [PATCH 05/14] refactor(marshal): Change new-encoding prefix symbol from "#" to "~" This provides better differention w.r.t. smallcaps, cf. https://github.com/endojs/endo/pull/1594#discussion_r1285263503 --- packages/marshal/src/encodePassable.js | 8 ++++---- packages/marshal/test/test-encodePassable.js | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index 7d6a745f8b..de22ddf845 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -561,8 +561,8 @@ export const makeEncodePassable = (encodeOptions = {}) => { } }; const encodePassable = xxx - ? // A leading "#" indicates the v2 encoding (with escaping in strings rather than arrays). - passable => `#${innerEncode(passable)}` + ? // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). + passable => `~${innerEncode(passable)}` : innerEncode; return harden(encodePassable); }; @@ -654,8 +654,8 @@ export const makeDecodePassable = (decodeOptions = {}) => { return innerDecode; }; const decodePassable = encoded => { - // A leading "#" indicates the v2 encoding (with escaping in strings rather than arrays). - if (encoded.startsWith('#')) { + // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). + if (encoded.startsWith('~')) { const innerDecode = makeInnerDecode( decodeStringWithEscapes, decodeArrayWithoutEscapes, diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 1b1c14f064..5da9a2ea2a 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -133,10 +133,10 @@ const goldenPairs = harden([ test('golden round trips', t => { for (const [k, e] of goldenPairs) { t.is(encodePassable(k), e, 'does k encode as expected'); - t.is(encodePassable2(k), `#${e}`, 'does k small-encode as expected'); + t.is(encodePassable2(k), `~${e}`, 'does k small-encode as expected'); t.is(decodePassable(e), k, 'does the key round trip through the encoding'); t.is( - decodePassable(`#${e}`), + decodePassable(`~${e}`), k, 'does the small-encoded key round trip through the encoding', ); From 4ed2c15c3987b6395521f3ec433f57dad0e95387 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 29 Aug 2023 15:12:37 -0400 Subject: [PATCH 06/14] chore(marshal): Consistently assert behavior of makeEncodePassable remotable/promise/error callbacks --- packages/marshal/src/encodePassable.js | 38 +++++++++++++++++++++----- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index de22ddf845..dd49dd4eea 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -464,6 +464,21 @@ const decodeTagged = (encoded, decodeArray, decodePassable) => { return makeTagged(tag, payload); }; +const assertEncodedRemotable = encoding => { + encoding.charAt(0) === 'r' || + Fail`internal: Remotable encoding must start with "r": ${encoding}`; +}; + +const assertEncodedPromise = encoding => { + encoding.charAt(0) === '?' || + Fail`internal: Promise encoding must start with "?": ${encoding}`; +}; + +const assertEncodedError = encoding => { + encoding.charAt(0) === '!' || + Fail`internal: Error encoding must start with "!": ${encoding}`; +}; + /** * @typedef {object} EncodeOptions * @property {( @@ -500,7 +515,19 @@ export const makeEncodePassable = (encodeOptions = {}) => { const innerEncode = passable => { if (isErrorLike(passable)) { - return encodeError(passable, innerEncode); + // We pull out this special case to accommodate errors that are not + // valid Passables. For example, because they're not frozen. + // The special case can only ever apply at the root, and therefore + // outside the recursion, since an error could only be deeper in + // a passable structure if it were passable. + // + // We pull out this special case because, for these errors, we're much + // more interested in reporting whatever diagnostic information they + // carry than we are about reporting problems encountered in reporting + // this information. + const result = encodeError(passable, innerEncode); + assertEncodedError(result); + return result; } const passStyle = passStyleOf(passable); switch (passStyle) { @@ -524,20 +551,17 @@ export const makeEncodePassable = (encodeOptions = {}) => { } case 'remotable': { const result = encodeRemotable(passable, innerEncode); - result.charAt(0) === 'r' || - Fail`internal: Remotable encoding must start with "r": ${result}`; + assertEncodedRemotable(result); return result; } case 'error': { const result = encodeError(passable, innerEncode); - result.charAt(0) === '!' || - Fail`internal: Error encoding must start with "!": ${result}`; + assertEncodedError(result); return result; } case 'promise': { const result = encodePromise(passable, innerEncode); - result.charAt(0) === '?' || - Fail`internal: Promise encoding must start with "?": ${result}`; + assertEncodedPromise(result); return result; } case 'symbol': { From 820932a886036348c8da85afe775c644b82d401d Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Thu, 7 Sep 2023 17:22:22 -0400 Subject: [PATCH 07/14] chore(marshal): Name the encodePassable formats ("legacyOrdered" and "compactOrdered") https://github.com/endojs/endo/pull/1594#discussion_r1202971340 --- packages/marshal/src/encodePassable.js | 85 +++++++++++++------- packages/marshal/test/test-encodePassable.js | 20 ++++- 2 files changed, 75 insertions(+), 30 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index dd49dd4eea..05a4bbc4a2 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -257,9 +257,20 @@ const stringEscapes = Array(0x22) stringEscapes['^'.charCodeAt(0)] = '_@'; stringEscapes['_'.charCodeAt(0)] = '__'; -const encodeStringWithEscapes = str => +/** + * Encodes a string with escape sequences for use in the "compactOrdered" format. + * + * @type {(str: string) => string} + */ +const encodeCompactString = str => `s${str.replace(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)])}`; -const decodeStringWithEscapes = encoded => { + +/** + * Decodes a string from the "compactOrdered" format. + * + * @type {(encoded: string) => string} + */ +const decodeCompactString = encoded => { return encoded.slice(1).replace(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { switch (esc) { case '!_': @@ -282,18 +293,30 @@ const decodeStringWithEscapes = encoded => { }); }; -const encodeStringWithoutEscapes = str => `s${str}`; -const decodeStringWithoutEscapes = encoded => encoded.slice(1); +/** + * Encodes a string by simple prefixing for use in the "legacyOrdered" format. + * + * @type {(str: string) => string} + */ +const encodeLegacyString = str => `s${str}`; + +/** + * Decodes a string from the "legacyOrdered" format. + * + * @type {(encoded: string) => string} + */ +const decodeLegacyString = encoded => encoded.slice(1); /** - * Encodes an array into a sequence of encoded elements, each terminated by a - * space (which is part of the escaped range in encoded strings). + * Encodes an array into a sequence of encoded elements for use in the "compactOrdered" + * format, each terminated by a space (which is part of the escaped range in + * "compactOrdered" encoded strings). * * @param {unknown[]} array * @param {(p: Passable) => string} encodePassable * @returns {string} */ -const encodeArrayWithoutEscapes = (array, encodePassable) => { +const encodeCompactArray = (array, encodePassable) => { const chars = ['^']; for (const element of array) { const enc = encodePassable(element); @@ -308,7 +331,7 @@ const encodeArrayWithoutEscapes = (array, encodePassable) => { * @param {number} [skip] * @returns {Array} */ -const decodeArrayWithoutEscapes = (encoded, decodePassable, skip = 0) => { +const decodeCompactArray = (encoded, decodePassable, skip = 0) => { const elements = []; let depth = 0; // Scan encoded rather than its tail to avoid slow `substring` in XS. @@ -355,9 +378,9 @@ const decodeArrayWithoutEscapes = (encoded, decodePassable, skip = 0) => { }; /** - * Performs the original array encoding, which escapes all array elements rather - * than just strings (`\u0000` as the element terminator and `\u0001` as the - * escape prefix for `\u0000` or `\u0001`). + * Performs the original array encoding, which escapes all encoded array + * elements rather than just strings (`\u0000` as the element terminator and + * `\u0001` as the escape prefix for `\u0000` or `\u0001`). * This necessitated an undesirable amount of iteration and expansion; see * https://github.com/endojs/endo/pull/1260#discussion_r960369826 * @@ -365,7 +388,7 @@ const decodeArrayWithoutEscapes = (encoded, decodePassable, skip = 0) => { * @param {(p: Passable) => string} encodePassable * @returns {string} */ -const encodeArrayWithEscapes = (array, encodePassable) => { +const encodeLegacyArray = (array, encodePassable) => { const chars = ['[']; for (const element of array) { const enc = encodePassable(element); @@ -386,7 +409,7 @@ const encodeArrayWithEscapes = (array, encodePassable) => { * @param {number} [skip] * @returns {Array} */ -const decodeArrayWithEscapes = (encoded, decodePassable, skip = 0) => { +const decodeLegacyArray = (encoded, decodePassable, skip = 0) => { const elements = []; const elemChars = []; // Use a string iterator to avoid slow indexed access in XS. @@ -493,7 +516,7 @@ const assertEncodedError = encoding => { * error: Error, * encodeRecur: (p: Passable) => string, * ) => string} [encodeError] - * @property {boolean} [xxx] + * @property {'legacyOrdered' | 'compactOrdered'} [format] */ /** @@ -505,13 +528,23 @@ export const makeEncodePassable = (encodeOptions = {}) => { encodeRemotable = (rem, _) => Fail`remotable unexpected: ${rem}`, encodePromise = (prom, _) => Fail`promise unexpected: ${prom}`, encodeError = (err, _) => Fail`error unexpected: ${err}`, - xxx = false, + format = 'legacyOrdered', } = encodeOptions; - const encodeString = xxx - ? encodeStringWithEscapes - : encodeStringWithoutEscapes; - const encodeArray = xxx ? encodeArrayWithoutEscapes : encodeArrayWithEscapes; + let formatPrefix; + let encodeString; + let encodeArray; + if (format === 'legacyOrdered') { + formatPrefix = ''; + encodeString = encodeLegacyString; + encodeArray = encodeLegacyArray; + } else if (format === 'compactOrdered') { + formatPrefix = '~'; + encodeString = encodeCompactString; + encodeArray = encodeCompactArray; + } else { + Fail`Unrecognized format: ${q(format)}`; + } const innerEncode = passable => { if (isErrorLike(passable)) { @@ -584,10 +617,7 @@ export const makeEncodePassable = (encodeOptions = {}) => { } } }; - const encodePassable = xxx - ? // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). - passable => `~${innerEncode(passable)}` - : innerEncode; + const encodePassable = passable => `${formatPrefix}${innerEncode(passable)}`; return harden(encodePassable); }; harden(makeEncodePassable); @@ -681,15 +711,12 @@ export const makeDecodePassable = (decodeOptions = {}) => { // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). if (encoded.startsWith('~')) { const innerDecode = makeInnerDecode( - decodeStringWithEscapes, - decodeArrayWithoutEscapes, + decodeCompactString, + decodeCompactArray, ); return innerDecode(encoded.slice(1)); } - const innerDecode = makeInnerDecode( - decodeStringWithoutEscapes, - decodeArrayWithEscapes, - ); + const innerDecode = makeInnerDecode(decodeLegacyString, decodeLegacyArray); return innerDecode(encoded); }; return harden(decodePassable); diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 5da9a2ea2a..8aa71cb114 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -70,7 +70,7 @@ const encodePassableInternal2 = makeEncodePassable({ encodeRemotable: r => encodeThing('r', r), encodePromise: p => encodeThing('?', p), encodeError: er => encodeThing('!', er), - xxx: true, + format: 'compactOrdered', }); const encodePassable = passable => { @@ -93,6 +93,24 @@ const decodePassable = encoded => { return decodePassableInternal(encoded); }; +test('makeEncodePassable argument validation', t => { + t.notThrows(() => makeEncodePassable(), 'must accept zero arguments'); + t.notThrows(() => makeEncodePassable({}), 'must accept empty options'); + t.notThrows( + () => makeEncodePassable({ format: 'legacyOrdered' }), + 'must accept format: "legacyOrdered"', + ); + t.notThrows( + () => makeEncodePassable({ format: 'compactOrdered' }), + 'must accept format: "compactOrdered"', + ); + t.throws( + () => makeEncodePassable({ format: 'newHotness' }), + { message: /^Unrecognized format\b/ }, + 'must reject unknown format', + ); +}); + const { comparator: compareFull } = makeComparatorKit(compareRemotables); const asNumber = new Float64Array(1); From 1486557a29f0fb0feb704345b4c8a105b01d27c9 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 12 Sep 2023 18:38:58 -0400 Subject: [PATCH 08/14] chore(marshal): Escape remotable/promise/error encodings in compactOrdered format --- packages/marshal/src/encodePassable.js | 117 +++++++++++-------- packages/marshal/test/test-encodePassable.js | 67 +++++++++++ 2 files changed, 137 insertions(+), 47 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index 05a4bbc4a2..eee8fc076d 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -262,16 +262,16 @@ stringEscapes['_'.charCodeAt(0)] = '__'; * * @type {(str: string) => string} */ -const encodeCompactString = str => - `s${str.replace(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)])}`; +const encodeCompactStringSuffix = str => + str.replace(/[\0-!^_]/g, ch => stringEscapes[ch.charCodeAt(0)]); /** * Decodes a string from the "compactOrdered" format. * * @type {(encoded: string) => string} */ -const decodeCompactString = encoded => { - return encoded.slice(1).replace(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { +const decodeCompactStringSuffix = encoded => { + return encoded.replace(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { switch (esc) { case '!_': return ' '; @@ -294,18 +294,18 @@ const decodeCompactString = encoded => { }; /** - * Encodes a string by simple prefixing for use in the "legacyOrdered" format. + * Trivially identity-encodes a string for use in the "legacyOrdered" format. * * @type {(str: string) => string} */ -const encodeLegacyString = str => `s${str}`; +const encodeLegacyStringSuffix = str => str; /** - * Decodes a string from the "legacyOrdered" format. + * Trivially identity-decodes a string from the "legacyOrdered" format. * * @type {(encoded: string) => string} */ -const decodeLegacyString = encoded => encoded.slice(1); +const decodeLegacyStringSuffix = encoded => encoded; /** * Encodes an array into a sequence of encoded elements for use in the "compactOrdered" @@ -487,19 +487,34 @@ const decodeTagged = (encoded, decodeArray, decodePassable) => { return makeTagged(tag, payload); }; -const assertEncodedRemotable = encoding => { - encoding.charAt(0) === 'r' || - Fail`internal: Remotable encoding must start with "r": ${encoding}`; +const makeEncodeRemotable = (unsafeEncodeRemotable, encodeStringSuffix) => { + const encodeRemotable = (r, innerEncode) => { + const encoding = unsafeEncodeRemotable(r, innerEncode); + (typeof encoding === 'string' && encoding.charAt(0) === 'r') || + Fail`internal: Remotable encoding must start with "r": ${encoding}`; + return `r${encodeStringSuffix(encoding.slice(1))}`; + }; + return encodeRemotable; }; -const assertEncodedPromise = encoding => { - encoding.charAt(0) === '?' || - Fail`internal: Promise encoding must start with "?": ${encoding}`; +const makeEncodePromise = (unsafeEncodePromise, encodeStringSuffix) => { + const encodePromise = (p, innerEncode) => { + const encoding = unsafeEncodePromise(p, innerEncode); + (typeof encoding === 'string' && encoding.charAt(0) === '?') || + Fail`internal: Promise encoding must start with "?": ${encoding}`; + return `?${encodeStringSuffix(encoding.slice(1))}`; + }; + return encodePromise; }; -const assertEncodedError = encoding => { - encoding.charAt(0) === '!' || - Fail`internal: Error encoding must start with "!": ${encoding}`; +const makeEncodeError = (unsafeEncodeError, encodeStringSuffix) => { + const encodeError = (err, innerEncode) => { + const encoding = unsafeEncodeError(err, innerEncode); + (typeof encoding === 'string' && encoding.charAt(0) === '!') || + Fail`internal: Error encoding must start with "!": ${encoding}`; + return `!${encodeStringSuffix(encoding.slice(1))}`; + }; + return encodeError; }; /** @@ -525,26 +540,37 @@ const assertEncodedError = encoding => { */ export const makeEncodePassable = (encodeOptions = {}) => { const { - encodeRemotable = (rem, _) => Fail`remotable unexpected: ${rem}`, - encodePromise = (prom, _) => Fail`promise unexpected: ${prom}`, - encodeError = (err, _) => Fail`error unexpected: ${err}`, + encodeRemotable: unsafeEncodeRemotable = (r, _) => + Fail`remotable unexpected: ${r}`, + encodePromise: unsafeEncodePromise = (p, _) => + Fail`promise unexpected: ${p}`, + encodeError: unsafeEncodeError = (err, _) => Fail`error unexpected: ${err}`, format = 'legacyOrdered', } = encodeOptions; let formatPrefix; - let encodeString; + let encodeStringSuffix; let encodeArray; if (format === 'legacyOrdered') { formatPrefix = ''; - encodeString = encodeLegacyString; + encodeStringSuffix = encodeLegacyStringSuffix; encodeArray = encodeLegacyArray; } else if (format === 'compactOrdered') { formatPrefix = '~'; - encodeString = encodeCompactString; + encodeStringSuffix = encodeCompactStringSuffix; encodeArray = encodeCompactArray; } else { Fail`Unrecognized format: ${q(format)}`; } + const encodeRemotable = makeEncodeRemotable( + unsafeEncodeRemotable, + encodeStringSuffix, + ); + const encodePromise = makeEncodePromise( + unsafeEncodePromise, + encodeStringSuffix, + ); + const encodeError = makeEncodeError(unsafeEncodeError, encodeStringSuffix); const innerEncode = passable => { if (isErrorLike(passable)) { @@ -558,9 +584,7 @@ export const makeEncodePassable = (encodeOptions = {}) => { // more interested in reporting whatever diagnostic information they // carry than we are about reporting problems encountered in reporting // this information. - const result = encodeError(passable, innerEncode); - assertEncodedError(result); - return result; + return encodeError(passable, innerEncode); } const passStyle = passStyleOf(passable); switch (passStyle) { @@ -574,7 +598,7 @@ export const makeEncodePassable = (encodeOptions = {}) => { return encodeBinary64(passable); } case 'string': { - return encodeString(passable); + return `s${encodeStringSuffix(passable)}`; } case 'boolean': { return `b${passable}`; @@ -583,25 +607,18 @@ export const makeEncodePassable = (encodeOptions = {}) => { return encodeBigInt(passable); } case 'remotable': { - const result = encodeRemotable(passable, innerEncode); - assertEncodedRemotable(result); - return result; + return encodeRemotable(passable, innerEncode); } case 'error': { - const result = encodeError(passable, innerEncode); - assertEncodedError(result); - return result; + return encodeError(passable, innerEncode); } case 'promise': { - const result = encodePromise(passable, innerEncode); - assertEncodedPromise(result); - return result; + return encodePromise(passable, innerEncode); } case 'symbol': { // Strings and symbols share encoding logic. - // Encode the name as a string, then replace the `s` prefix. - const encName = encodeString(nameForPassableSymbol(passable)); - return `y${encName.slice(1)}`; + const name = nameForPassableSymbol(passable); + return `y${encodeStringSuffix(name)}`; } case 'copyArray': { return encodeArray(passable, innerEncode); @@ -649,7 +666,7 @@ export const makeDecodePassable = (decodeOptions = {}) => { decodeError = (err, _) => Fail`error unexpected: ${err}`, } = decodeOptions; - const makeInnerDecode = (decodeString, decodeArray) => { + const makeInnerDecode = (decodeStringSuffix, decodeArray) => { const innerDecode = encoded => { switch (encoded.charAt(0)) { case 'v': { @@ -662,7 +679,7 @@ export const makeDecodePassable = (decodeOptions = {}) => { return decodeBinary64(encoded); } case 's': { - return decodeString(encoded); + return decodeStringSuffix(encoded.slice(1)); } case 'b': { if (encoded === 'btrue') { @@ -677,17 +694,20 @@ export const makeDecodePassable = (decodeOptions = {}) => { return decodeBigInt(encoded); } case 'r': { - return decodeRemotable(encoded, innerDecode); + const normalized = `r${decodeStringSuffix(encoded.slice(1))}`; + return decodeRemotable(normalized, innerDecode); } case '?': { - return decodePromise(encoded, innerDecode); + const normalized = `?${decodeStringSuffix(encoded.slice(1))}`; + return decodePromise(normalized, innerDecode); } case '!': { - return decodeError(encoded, innerDecode); + const normalized = `!${decodeStringSuffix(encoded.slice(1))}`; + return decodeError(normalized, innerDecode); } case 'y': { // Strings and symbols share decoding logic. - const name = decodeString(encoded); + const name = decodeStringSuffix(encoded.slice(1)); return passableSymbolForName(name); } case '[': @@ -711,12 +731,15 @@ export const makeDecodePassable = (decodeOptions = {}) => { // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). if (encoded.startsWith('~')) { const innerDecode = makeInnerDecode( - decodeCompactString, + decodeCompactStringSuffix, decodeCompactArray, ); return innerDecode(encoded.slice(1)); } - const innerDecode = makeInnerDecode(decodeLegacyString, decodeLegacyArray); + const innerDecode = makeInnerDecode( + decodeLegacyStringSuffix, + decodeLegacyArray, + ); return innerDecode(encoded); }; return harden(decodePassable); diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 8aa71cb114..175a63b913 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -5,6 +5,7 @@ import { test } from './prepare-test-env-ava.js'; // eslint-disable-next-line import/order import { fc } from '@fast-check/ava'; +import { Remotable } from '@endo/pass-style'; import { arbPassable } from '@endo/pass-style/tools.js'; import { Fail, q } from '@endo/errors'; @@ -164,6 +165,72 @@ test('golden round trips', t => { t.is(decodePassable('f0000000000000000'), NaN); }); +test('capability encoding', t => { + const allAscii = Array(128) + .fill() + .map((_, i) => String.fromCharCode(i)) + .join(''); + const encoders = { + encodeRemotable: _r => `r${allAscii}`, + encodePromise: _p => `?${allAscii}`, + encodeError: _err => `!${allAscii}`, + }; + + const data = harden([Remotable(), new Promise(() => {}), Error('Foo')]); + const decoders = Object.fromEntries( + Object.entries(encoders).map(([encoderName, encoder], i) => { + const decoderName = encoderName.replace('encode', 'decode'); + const decoder = encoded => { + t.is( + encoded, + encoder(undefined), + `encoding-level escaping must be invisible in ${decoderName}`, + ); + return data[i]; + }; + return [decoderName, decoder]; + }), + ); + const decodeAsciiPassable = makeDecodePassable(decoders); + + const encodePassableLegacy = makeEncodePassable(encoders); + const dataLegacy = encodePassableLegacy(data); + t.is( + dataLegacy, + `[${['r', '?', '!'] + .map( + prefix => + // eslint-disable-next-line no-control-regex + `${prefix}${allAscii.replace(/[\x00\x01]/g, '\u0001$&')}\u0000`, + ) + .join('')}`, + 'legacyOrdered format must escape U+0000 and U+0001 in deep remotable/promise/error encodings', + ); + t.is(encodePassableLegacy(decodeAsciiPassable(dataLegacy)), dataLegacy); + + const encodePassableCompact = makeEncodePassable({ + format: 'compactOrdered', + ...encoders, + }); + const dataCompact = encodePassableCompact(data); + // eslint-disable-next-line no-control-regex + const allAsciiCompact = allAscii.replace(/[\0-\x1F !^_]/g, ch => { + if (ch === ' ') return '!_'; + if (ch === '!') return '!|'; + if (ch === '^') return '_@'; + if (ch === '_') return '__'; + return `!${String.fromCharCode(ch.charCodeAt(0) + 0x21)}`; + }); + t.is( + dataCompact, + `~^${['r', '?', '!'] + .map(prefix => `${prefix}${allAsciiCompact} `) + .join('')}`, + 'compactOrdered format must escape U+0000 through U+001F, space, exclamation, caret, and underscore in remotable/promise/error encodings', + ); + t.is(encodePassableCompact(decodeAsciiPassable(dataCompact)), dataCompact); +}); + const orderInvariants = (x, y) => { const rankComp = compareRank(x, y); const fullComp = compareFull(x, y); From 043039f273802cc74806de71294425fc01384a9d Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 9 Jan 2024 03:31:46 -0500 Subject: [PATCH 09/14] feat(marshal): Introduce makePassableKit and support arbitrary remotable/promise/error compactOrdered encodings (rather than forcing everything through string encoding) --- packages/marshal/src/encodePassable.js | 302 +++++++++++-------- packages/marshal/test/test-encodePassable.js | 129 ++++++-- 2 files changed, 288 insertions(+), 143 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index eee8fc076d..d30ce00a98 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -18,8 +18,9 @@ import { */ /** @typedef {import('./types.js').RankCover} RankCover */ -import { q, Fail } from '@endo/errors'; +import { b, q, Fail } from '@endo/errors'; +const { isArray } = Array; const { fromEntries, is } = Object; const { ownKeys } = Reflect; @@ -487,32 +488,35 @@ const decodeTagged = (encoded, decodeArray, decodePassable) => { return makeTagged(tag, payload); }; -const makeEncodeRemotable = (unsafeEncodeRemotable, encodeStringSuffix) => { +const makeEncodeRemotable = (unsafeEncodeRemotable, verifyEncoding) => { const encodeRemotable = (r, innerEncode) => { const encoding = unsafeEncodeRemotable(r, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === 'r') || Fail`internal: Remotable encoding must start with "r": ${encoding}`; - return `r${encodeStringSuffix(encoding.slice(1))}`; + verifyEncoding(encoding, 'Remotable'); + return encoding; }; return encodeRemotable; }; -const makeEncodePromise = (unsafeEncodePromise, encodeStringSuffix) => { +const makeEncodePromise = (unsafeEncodePromise, verifyEncoding) => { const encodePromise = (p, innerEncode) => { const encoding = unsafeEncodePromise(p, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === '?') || Fail`internal: Promise encoding must start with "?": ${encoding}`; - return `?${encodeStringSuffix(encoding.slice(1))}`; + verifyEncoding(encoding, 'Promise'); + return encoding; }; return encodePromise; }; -const makeEncodeError = (unsafeEncodeError, encodeStringSuffix) => { +const makeEncodeError = (unsafeEncodeError, verifyEncoding) => { const encodeError = (err, innerEncode) => { const encoding = unsafeEncodeError(err, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === '!') || Fail`internal: Error encoding must start with "!": ${encoding}`; - return `!${encodeStringSuffix(encoding.slice(1))}`; + verifyEncoding(encoding, 'Error'); + return encoding; }; return encodeError; }; @@ -535,42 +539,24 @@ const makeEncodeError = (unsafeEncodeError, encodeStringSuffix) => { */ /** - * @param {EncodeOptions} [encodeOptions] - * @returns {(passable: Passable) => string} + * @param {(str: string) => string} encodeStringSuffix + * @param {(arr: unknown[], encodeRecur: (p: Passable) => string) => string} encodeArray + * @param {Required & {verifyEncoding?: (encoded: string, label: string) => void}} options + * @returns {(p: Passable) => string} */ -export const makeEncodePassable = (encodeOptions = {}) => { +const makeInnerEncode = (encodeStringSuffix, encodeArray, options) => { const { - encodeRemotable: unsafeEncodeRemotable = (r, _) => - Fail`remotable unexpected: ${r}`, - encodePromise: unsafeEncodePromise = (p, _) => - Fail`promise unexpected: ${p}`, - encodeError: unsafeEncodeError = (err, _) => Fail`error unexpected: ${err}`, - format = 'legacyOrdered', - } = encodeOptions; - - let formatPrefix; - let encodeStringSuffix; - let encodeArray; - if (format === 'legacyOrdered') { - formatPrefix = ''; - encodeStringSuffix = encodeLegacyStringSuffix; - encodeArray = encodeLegacyArray; - } else if (format === 'compactOrdered') { - formatPrefix = '~'; - encodeStringSuffix = encodeCompactStringSuffix; - encodeArray = encodeCompactArray; - } else { - Fail`Unrecognized format: ${q(format)}`; - } + encodeRemotable: unsafeEncodeRemotable, + encodePromise: unsafeEncodePromise, + encodeError: unsafeEncodeError, + verifyEncoding = () => {}, + } = options; const encodeRemotable = makeEncodeRemotable( unsafeEncodeRemotable, - encodeStringSuffix, - ); - const encodePromise = makeEncodePromise( - unsafeEncodePromise, - encodeStringSuffix, + verifyEncoding, ); - const encodeError = makeEncodeError(unsafeEncodeError, encodeStringSuffix); + const encodePromise = makeEncodePromise(unsafeEncodePromise, verifyEncoding); + const encodeError = makeEncodeError(unsafeEncodeError, verifyEncoding); const innerEncode = passable => { if (isErrorLike(passable)) { @@ -618,6 +604,7 @@ export const makeEncodePassable = (encodeOptions = {}) => { case 'symbol': { // Strings and symbols share encoding logic. const name = nameForPassableSymbol(passable); + assert.typeof(name, 'string'); return `y${encodeStringSuffix(name)}`; } case 'copyArray': { @@ -634,10 +621,8 @@ export const makeEncodePassable = (encodeOptions = {}) => { } } }; - const encodePassable = passable => `${formatPrefix}${innerEncode(passable)}`; - return harden(encodePassable); + return innerEncode; }; -harden(makeEncodePassable); /** * @typedef {object} DecodeOptions @@ -655,94 +640,175 @@ harden(makeEncodePassable); * ) => Error} [decodeError] */ +const liberalDecoders = /** @type {Required} */ ( + /** @type {unknown} */ ({ + decodeRemotable: (_encoding, _innerDecode) => undefined, + decodePromise: (_encoding, _innerDecode) => undefined, + decodeError: (_encoding, _innerDecode) => undefined, + }) +); + /** - * @param {DecodeOptions} [decodeOptions] + * @param {(encoded: string) => string} decodeStringSuffix + * @param {(encoded: string, decodeRecur: (e: string) => Passable) => unknown[]} decodeArray + * @param {Required} options * @returns {(encoded: string) => Passable} */ -export const makeDecodePassable = (decodeOptions = {}) => { - const { - decodeRemotable = (rem, _) => Fail`remotable unexpected: ${rem}`, - decodePromise = (prom, _) => Fail`promise unexpected: ${prom}`, - decodeError = (err, _) => Fail`error unexpected: ${err}`, - } = decodeOptions; - - const makeInnerDecode = (decodeStringSuffix, decodeArray) => { - const innerDecode = encoded => { - switch (encoded.charAt(0)) { - case 'v': { - return null; - } - case 'z': { - return undefined; - } - case 'f': { - return decodeBinary64(encoded); - } - case 's': { - return decodeStringSuffix(encoded.slice(1)); - } - case 'b': { - if (encoded === 'btrue') { - return true; - } else if (encoded === 'bfalse') { - return false; - } - throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${encoded}`; - } - case 'n': - case 'p': { - return decodeBigInt(encoded); - } - case 'r': { - const normalized = `r${decodeStringSuffix(encoded.slice(1))}`; - return decodeRemotable(normalized, innerDecode); - } - case '?': { - const normalized = `?${decodeStringSuffix(encoded.slice(1))}`; - return decodePromise(normalized, innerDecode); - } - case '!': { - const normalized = `!${decodeStringSuffix(encoded.slice(1))}`; - return decodeError(normalized, innerDecode); - } - case 'y': { - // Strings and symbols share decoding logic. - const name = decodeStringSuffix(encoded.slice(1)); - return passableSymbolForName(name); - } - case '[': - case '^': { - return decodeArray(encoded, innerDecode); - } - case '(': { - return decodeRecord(encoded, decodeArray, innerDecode); - } - case ':': { - return decodeTagged(encoded, decodeArray, innerDecode); - } - default: { - throw Fail`invalid database key: ${encoded}`; +const makeInnerDecode = (decodeStringSuffix, decodeArray, options) => { + const { decodeRemotable, decodePromise, decodeError } = options; + const innerDecode = encoded => { + switch (encoded.charAt(0)) { + case 'v': { + return null; + } + case 'z': { + return undefined; + } + case 'f': { + return decodeBinary64(encoded); + } + case 's': { + return decodeStringSuffix(encoded.slice(1)); + } + case 'b': { + if (encoded === 'btrue') { + return true; + } else if (encoded === 'bfalse') { + return false; } + throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${encoded}`; } - }; - return innerDecode; + case 'n': + case 'p': { + return decodeBigInt(encoded); + } + case 'r': { + return decodeRemotable(encoded, innerDecode); + } + case '?': { + return decodePromise(encoded, innerDecode); + } + case '!': { + return decodeError(encoded, innerDecode); + } + case 'y': { + // Strings and symbols share decoding logic. + const name = decodeStringSuffix(encoded.slice(1)); + return passableSymbolForName(name); + } + case '[': + case '^': { + return decodeArray(encoded, innerDecode); + } + case '(': { + return decodeRecord(encoded, decodeArray, innerDecode); + } + case ':': { + return decodeTagged(encoded, decodeArray, innerDecode); + } + default: { + throw Fail`invalid database key: ${encoded}`; + } + } }; + return innerDecode; +}; + +/** + * @typedef {object} PassableKit + * @property {ReturnType} encodePassable + * @property {ReturnType} decodePassable + */ + +/** + * @param {EncodeOptions & DecodeOptions} [options] + * @returns {PassableKit} + */ +export const makePassableKit = (options = {}) => { + const { + encodeRemotable = (r, _) => Fail`remotable unexpected: ${r}`, + encodePromise = (p, _) => Fail`promise unexpected: ${p}`, + encodeError = (err, _) => Fail`error unexpected: ${err}`, + format = 'legacyOrdered', + + decodeRemotable = (encoding, _) => Fail`remotable unexpected: ${encoding}`, + decodePromise = (encoding, _) => Fail`promise unexpected: ${encoding}`, + decodeError = (encoding, _) => Fail`error unexpected: ${encoding}`, + } = options; + + /** @type {PassableKit['encodePassable']} */ + let encodePassable; + const encodeOptions = { encodeRemotable, encodePromise, encodeError, format }; + if (format === 'compactOrdered') { + const liberalDecode = makeInnerDecode( + decodeCompactStringSuffix, + decodeCompactArray, + liberalDecoders, + ); + const verifyEncoding = (encoding, label) => { + const decoded = decodeCompactArray(`^s ${encoding} s `, liberalDecode); + (isArray(decoded) && + decoded.length === 3 && + decoded[0] === '' && + decoded[2] === '') || + Fail`internal: ${b(label)} encoding must be embeddable: ${encoding}`; + }; + const encodeCompact = makeInnerEncode( + encodeCompactStringSuffix, + encodeCompactArray, + { ...encodeOptions, verifyEncoding }, + ); + encodePassable = passable => `~${encodeCompact(passable)}`; + } else if (format === 'legacyOrdered') { + encodePassable = makeInnerEncode( + encodeLegacyStringSuffix, + encodeLegacyArray, + encodeOptions, + ); + } else { + throw Fail`Unrecognized format: ${q(format)}`; + } + + const decodeOptions = { decodeRemotable, decodePromise, decodeError }; + const decodeCompact = makeInnerDecode( + decodeCompactStringSuffix, + decodeCompactArray, + decodeOptions, + ); + const decodeLegacy = makeInnerDecode( + decodeLegacyStringSuffix, + decodeLegacyArray, + decodeOptions, + ); const decodePassable = encoded => { // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). if (encoded.startsWith('~')) { - const innerDecode = makeInnerDecode( - decodeCompactStringSuffix, - decodeCompactArray, - ); - return innerDecode(encoded.slice(1)); + return decodeCompact(encoded.slice(1)); } - const innerDecode = makeInnerDecode( - decodeLegacyStringSuffix, - decodeLegacyArray, - ); - return innerDecode(encoded); + return decodeLegacy(encoded); }; - return harden(decodePassable); + + return harden({ encodePassable, decodePassable }); +}; +harden(makePassableKit); + +/** + * @param {EncodeOptions} [encodeOptions] + * @returns {PassableKit['encodePassable']} + */ +export const makeEncodePassable = encodeOptions => { + const { encodePassable } = makePassableKit(encodeOptions); + return encodePassable; +}; +harden(makeEncodePassable); + +/** + * @param {DecodeOptions} [decodeOptions] + * @returns {PassableKit['decodePassable']} + */ +export const makeDecodePassable = decodeOptions => { + const { decodePassable } = makePassableKit(decodeOptions); + return decodePassable; }; harden(makeDecodePassable); diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index 175a63b913..a043bfc00b 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -12,6 +12,7 @@ import { Fail, q } from '@endo/errors'; // eslint-disable-next-line import/no-extraneous-dependencies import { + makePassableKit, makeEncodePassable, makeDecodePassable, } from '../src/encodePassable.js'; @@ -94,24 +95,48 @@ const decodePassable = encoded => { return decodePassableInternal(encoded); }; -test('makeEncodePassable argument validation', t => { - t.notThrows(() => makeEncodePassable(), 'must accept zero arguments'); - t.notThrows(() => makeEncodePassable({}), 'must accept empty options'); - t.notThrows( - () => makeEncodePassable({ format: 'legacyOrdered' }), - 'must accept format: "legacyOrdered"', - ); - t.notThrows( - () => makeEncodePassable({ format: 'compactOrdered' }), - 'must accept format: "compactOrdered"', - ); - t.throws( - () => makeEncodePassable({ format: 'newHotness' }), - { message: /^Unrecognized format\b/ }, - 'must reject unknown format', +test('makePassableKit output shape', t => { + const kit = makePassableKit(); + t.deepEqual(Reflect.ownKeys(kit).sort(), [ + 'decodePassable', + 'encodePassable', + ]); + t.deepEqual( + Object.fromEntries( + Object.entries(kit).map(([key, value]) => [key, typeof value]), + ), + { encodePassable: 'function', decodePassable: 'function' }, ); }); +const verifyEncodeOptions = test.macro({ + title: label => `${label} encode options validation`, + // eslint-disable-next-line no-shadow + exec: (t, makeEncodePassable) => { + t.notThrows(() => makeEncodePassable(), 'must accept zero arguments'); + t.notThrows(() => makeEncodePassable({}), 'must accept empty options'); + t.notThrows( + () => makeEncodePassable({ format: 'legacyOrdered' }), + 'must accept format: "legacyOrdered"', + ); + t.notThrows( + () => makeEncodePassable({ format: 'compactOrdered' }), + 'must accept format: "compactOrdered"', + ); + t.throws( + () => makeEncodePassable({ format: 'newHotness' }), + { message: /^Unrecognized format\b/ }, + 'must reject unknown format', + ); + }, +}); +test('makeEncodePassable', verifyEncodeOptions, makeEncodePassable); +test( + 'makePassableKit', + verifyEncodeOptions, + (...args) => makePassableKit(...args).encodePassable, +); + const { comparator: compareFull } = makeComparatorKit(compareRemotables); const asNumber = new Float64Array(1); @@ -170,30 +195,35 @@ test('capability encoding', t => { .fill() .map((_, i) => String.fromCharCode(i)) .join(''); + const forceSigil = (str, newSigil) => newSigil + str.slice(1); const encoders = { - encodeRemotable: _r => `r${allAscii}`, - encodePromise: _p => `?${allAscii}`, - encodeError: _err => `!${allAscii}`, + encodeRemotable: (_r, encodeRecur) => + forceSigil(encodeRecur(allAscii), 'r'), + encodePromise: (_p, encodeRecur) => forceSigil(encodeRecur(allAscii), '?'), + encodeError: (_err, encodeRecur) => forceSigil(encodeRecur(allAscii), '!'), }; const data = harden([Remotable(), new Promise(() => {}), Error('Foo')]); const decoders = Object.fromEntries( - Object.entries(encoders).map(([encoderName, encoder], i) => { + Object.keys(encoders).map((encoderName, i) => { const decoderName = encoderName.replace('encode', 'decode'); - const decoder = encoded => { + const decoder = (encoded, decodeRecur) => { + const decodedString = decodeRecur(forceSigil(encoded, 's')); t.is( - encoded, - encoder(undefined), - `encoding-level escaping must be invisible in ${decoderName}`, + decodedString, + allAscii, + `encoding must be reversible in ${decoderName}`, ); return data[i]; }; return [decoderName, decoder]; }), ); - const decodeAsciiPassable = makeDecodePassable(decoders); - const encodePassableLegacy = makeEncodePassable(encoders); + const { + encodePassable: encodePassableLegacy, + decodePassable: decodeAsciiPassable, + } = makePassableKit({ ...encoders, ...decoders }); const dataLegacy = encodePassableLegacy(data); t.is( dataLegacy, @@ -231,6 +261,55 @@ test('capability encoding', t => { t.is(encodePassableCompact(decodeAsciiPassable(dataCompact)), dataCompact); }); +test('capability encoding validity constraints', t => { + const r = Remotable(); + let encoding; + const customEncode = makeEncodePassable({ + format: 'compactOrdered', + encodeRemotable: _r => encoding, + }); + const tryCustomEncode = () => { + const encoded = customEncode(r); + return encoded; + }; + + t.throws(tryCustomEncode, { message: /must start with "r"/ }); + + encoding = '?'; + t.throws(tryCustomEncode, { message: /must start with "r"/ }); + + encoding = 'r '; + t.throws(tryCustomEncode, { message: /unexpected array element terminator/ }); + + encoding = 'r s'; + t.throws(tryCustomEncode, { message: /must be embeddable/ }); + + encoding = 'r^^'; + t.throws(tryCustomEncode, { message: /unterminated array/ }); + + encoding = 'r'; + t.notThrows(tryCustomEncode, 'empty custom encoding is acceptable'); + + encoding = `r${encodePassableInternal2(harden([]))}`; + t.notThrows( + tryCustomEncode, + 'custom encoding containing an empty array is acceptable', + ); + + encoding = `r${encodePassableInternal2(harden(['foo', []]))}`; + t.notThrows( + tryCustomEncode, + 'custom encoding containing a non-empty array is acceptable', + ); + + // TODO turn into rejection? + encoding = `r\x04!`; + t.notThrows( + tryCustomEncode, + 'custom encoding is not constrained to use string escaping', + ); +}); + const orderInvariants = (x, y) => { const rankComp = compareRank(x, y); const fullComp = compareFull(x, y); From 0afce72d0f28782da3618ad6d4b6821cfee41e9b Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Tue, 9 Jan 2024 03:33:18 -0500 Subject: [PATCH 10/14] chore(marshal): Reject strings containing characters that *should* be escaped --- packages/marshal/src/encodePassable.js | 4 +-- packages/marshal/test/test-encodePassable.js | 35 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index d30ce00a98..33b82bc7ae 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -272,7 +272,7 @@ const encodeCompactStringSuffix = str => * @type {(encoded: string) => string} */ const decodeCompactStringSuffix = encoded => { - return encoded.replace(/([!_])(.|\n)?/g, (esc, prefix, suffix) => { + return encoded.replace(/([\0-!_])(.|\n)?/g, (esc, prefix, suffix) => { switch (esc) { case '!_': return ' '; @@ -284,7 +284,7 @@ const decodeCompactStringSuffix = encoded => { return '_'; default: { const ch = /** @type {string} */ (suffix); - // The range of valid escapes is [(0x00+0x21)..(0x1F+0x21)], i.e. + // The range of valid `!`-escape suffixes is [(0x00+0x21)..(0x1F+0x21)], i.e. // [0x21..0x40] (U+0021 EXCLAMATION MARK to U+0040 COMMERCIAL AT). (prefix === '!' && suffix !== undefined && ch >= '!' && ch <= '@') || Fail`invalid string escape: ${q(esc)}`; diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index a043bfc00b..fbd744fe4c 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -261,6 +261,41 @@ test('capability encoding', t => { t.is(encodePassableCompact(decodeAsciiPassable(dataCompact)), dataCompact); }); +test('compact string validity', t => { + t.notThrows(() => decodePassableInternal('~sa"z')); + t.notThrows(() => decodePassableInternal('~sa!!z')); + const specialEscapes = ['!_', '!|', '_@', '__']; + for (const prefix of ['!', '_']) { + for (let cp = 0; cp <= 0x7f; cp += 1) { + const esc = `${prefix}${String.fromCodePoint(cp)}`; + const tryDecode = () => decodePassableInternal(`~sa${esc}z`); + if (esc.match(/![!-@]/) || specialEscapes.includes(esc)) { + t.notThrows(tryDecode, `valid string escape: ${JSON.stringify(esc)}`); + } else { + t.throws( + tryDecode, + { message: /invalid string escape/ }, + `invalid string escape: ${JSON.stringify(esc)}`, + ); + } + } + t.throws( + () => decodePassableInternal(`~sa${prefix}`), + { message: /invalid string escape/ }, + `unterminated ${JSON.stringify(prefix)} escape`, + ); + } + for (let cp = 0; cp < 0x20; cp += 1) { + const ch = String.fromCodePoint(cp); + const uCode = cp.toString(16).padStart(4, '0'); + t.throws( + () => decodePassableInternal(`~sa${ch}z`), + { message: /invalid string escape/ }, + `disallowed string control character: U+${uCode} ${JSON.stringify(ch)}`, + ); + } +}); + test('capability encoding validity constraints', t => { const r = Remotable(); let encoding; From d6abd2cbcf7a4b8db988406429bff99155342636 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Wed, 10 Jan 2024 13:02:16 -0500 Subject: [PATCH 11/14] chore(marshal): Disallow C0 control characters in custom encodings --- packages/marshal/src/encodePassable.js | 20 +++- packages/marshal/test/test-encodePassable.js | 96 +++++++++++--------- 2 files changed, 70 insertions(+), 46 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index 33b82bc7ae..e4e94c84b4 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -24,6 +24,9 @@ const { isArray } = Array; const { fromEntries, is } = Object; const { ownKeys } = Reflect; +// eslint-disable-next-line no-control-regex +const rC0 = /[\x00-\x1F]/; + /** * Assuming that `record` is a CopyRecord, we have only * string-named own properties. `recordNames` returns those name *reverse* @@ -492,7 +495,7 @@ const makeEncodeRemotable = (unsafeEncodeRemotable, verifyEncoding) => { const encodeRemotable = (r, innerEncode) => { const encoding = unsafeEncodeRemotable(r, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === 'r') || - Fail`internal: Remotable encoding must start with "r": ${encoding}`; + Fail`Remotable encoding must start with "r": ${encoding}`; verifyEncoding(encoding, 'Remotable'); return encoding; }; @@ -503,7 +506,7 @@ const makeEncodePromise = (unsafeEncodePromise, verifyEncoding) => { const encodePromise = (p, innerEncode) => { const encoding = unsafeEncodePromise(p, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === '?') || - Fail`internal: Promise encoding must start with "?": ${encoding}`; + Fail`Promise encoding must start with "?": ${encoding}`; verifyEncoding(encoding, 'Promise'); return encoding; }; @@ -514,7 +517,7 @@ const makeEncodeError = (unsafeEncodeError, verifyEncoding) => { const encodeError = (err, innerEncode) => { const encoding = unsafeEncodeError(err, innerEncode); (typeof encoding === 'string' && encoding.charAt(0) === '!') || - Fail`internal: Error encoding must start with "!": ${encoding}`; + Fail`Error encoding must start with "!": ${encoding}`; verifyEncoding(encoding, 'Error'); return encoding; }; @@ -745,13 +748,22 @@ export const makePassableKit = (options = {}) => { decodeCompactArray, liberalDecoders, ); + /** + * @param {string} encoding + * @param {string} label + * @returns {void} + */ const verifyEncoding = (encoding, label) => { + !encoding.match(rC0) || + Fail`${b( + label, + )} encoding must not contain a C0 control character: ${encoding}`; const decoded = decodeCompactArray(`^s ${encoding} s `, liberalDecode); (isArray(decoded) && decoded.length === 3 && decoded[0] === '' && decoded[2] === '') || - Fail`internal: ${b(label)} encoding must be embeddable: ${encoding}`; + Fail`${b(label)} encoding must be embeddable: ${encoding}`; }; const encodeCompact = makeInnerEncode( encodeCompactStringSuffix, diff --git a/packages/marshal/test/test-encodePassable.js b/packages/marshal/test/test-encodePassable.js index fbd744fe4c..1acd3da4a9 100644 --- a/packages/marshal/test/test-encodePassable.js +++ b/packages/marshal/test/test-encodePassable.js @@ -287,7 +287,7 @@ test('compact string validity', t => { } for (let cp = 0; cp < 0x20; cp += 1) { const ch = String.fromCodePoint(cp); - const uCode = cp.toString(16).padStart(4, '0'); + const uCode = cp.toString(16).padStart(4, '0').toUpperCase(); t.throws( () => decodePassableInternal(`~sa${ch}z`), { message: /invalid string escape/ }, @@ -296,53 +296,65 @@ test('compact string validity', t => { } }); -test('capability encoding validity constraints', t => { - const r = Remotable(); - let encoding; - const customEncode = makeEncodePassable({ +test('compact custom encoding validity constraints', t => { + const encodings = new Map(); + const dynamicEncoder = obj => encodings.get(obj); + const dynamicEncodePassable = makeEncodePassable({ format: 'compactOrdered', - encodeRemotable: _r => encoding, + encodeRemotable: dynamicEncoder, + encodePromise: dynamicEncoder, + encodeError: dynamicEncoder, }); - const tryCustomEncode = () => { - const encoded = customEncode(r); - return encoded; + const makers = { + r: Remotable, + '?': Promise.resolve.bind(Promise), + '!': Error, }; - t.throws(tryCustomEncode, { message: /must start with "r"/ }); - - encoding = '?'; - t.throws(tryCustomEncode, { message: /must start with "r"/ }); - - encoding = 'r '; - t.throws(tryCustomEncode, { message: /unexpected array element terminator/ }); - - encoding = 'r s'; - t.throws(tryCustomEncode, { message: /must be embeddable/ }); - - encoding = 'r^^'; - t.throws(tryCustomEncode, { message: /unterminated array/ }); - - encoding = 'r'; - t.notThrows(tryCustomEncode, 'empty custom encoding is acceptable'); - - encoding = `r${encodePassableInternal2(harden([]))}`; - t.notThrows( - tryCustomEncode, - 'custom encoding containing an empty array is acceptable', - ); + for (const [sigil, makeInstance] of Object.entries(makers)) { + const instance = harden(makeInstance()); + const makeTryEncode = encoding => { + encodings.set(instance, encoding); + const tryEncode = () => dynamicEncodePassable(instance); + return tryEncode; + }; + + const rMustStartWith = RegExp(`must start with "[${sigil}]"`); + t.throws(makeTryEncode(undefined), { message: rMustStartWith }); + for (const otherSigil of Object.keys(makers).filter(s => s !== sigil)) { + t.throws(makeTryEncode(otherSigil), { message: rMustStartWith }); + } - encoding = `r${encodePassableInternal2(harden(['foo', []]))}`; - t.notThrows( - tryCustomEncode, - 'custom encoding containing a non-empty array is acceptable', - ); + t.throws(makeTryEncode(`${sigil} `), { + message: /unexpected array element terminator/, + }); + t.throws(makeTryEncode(`${sigil} s`), { message: /must be embeddable/ }); + t.throws(makeTryEncode(`${sigil}^^`), { message: /unterminated array/ }); + t.notThrows(makeTryEncode(sigil), 'empty custom encoding is acceptable'); + t.notThrows( + makeTryEncode(`${sigil}!`), + 'custom encoding containing an invalid string escape is acceptable', + ); + t.notThrows( + makeTryEncode(`${sigil}${encodePassableInternal2(harden([]))}`), + 'custom encoding containing an empty array is acceptable', + ); + t.notThrows( + makeTryEncode(`${sigil}${encodePassableInternal2(harden(['foo', []]))}`), + 'custom encoding containing a non-empty array is acceptable', + ); - // TODO turn into rejection? - encoding = `r\x04!`; - t.notThrows( - tryCustomEncode, - 'custom encoding is not constrained to use string escaping', - ); + for (let cp = 0; cp < 0x20; cp += 1) { + const ch = String.fromCodePoint(cp); + const encoding = `${sigil}${ch}`; + const uCode = cp.toString(16).padStart(4, '0').toUpperCase(); + t.throws( + makeTryEncode(encoding), + { message: /must not contain a C0/ }, + `disallowed encode output: U+${uCode} ${JSON.stringify(encoding)}`, + ); + } + } }); const orderInvariants = (x, y) => { From 52a67f9763f97d8657de35888d5e570939e7a853 Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Wed, 17 Jan 2024 20:03:50 -0500 Subject: [PATCH 12/14] fixup! feat(marshal): Introduce makePassableKit and support arbitrary remotable/promise/error compactOrdered encodings --- packages/marshal/index.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/marshal/index.js b/packages/marshal/index.js index 3e9b3afc87..6973cf84d7 100644 --- a/packages/marshal/index.js +++ b/packages/marshal/index.js @@ -7,6 +7,7 @@ export { stringify, parse } from './src/marshal-stringify.js'; export { decodeToJustin } from './src/marshal-justin.js'; export { + makePassableKit, makeEncodePassable, makeDecodePassable, isEncodedRemotable, From 59b02aae5a87050819298bab77621cf4c7d39afa Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Thu, 25 Jan 2024 01:22:13 -0500 Subject: [PATCH 13/14] perf(marshal): Avoid even more substring operations * Introduce a short-circuiting `getSuffix` (for skip=0). * Extend "skip" to `innerDecode`, `decodeBinary64`, `decodeRecord`, and `decodeTagged`. --- packages/marshal/src/encodePassable.js | 109 +++++++++++++++---------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/packages/marshal/src/encodePassable.js b/packages/marshal/src/encodePassable.js index e4e94c84b4..289cb9276c 100644 --- a/packages/marshal/src/encodePassable.js +++ b/packages/marshal/src/encodePassable.js @@ -27,6 +27,17 @@ const { ownKeys } = Reflect; // eslint-disable-next-line no-control-regex const rC0 = /[\x00-\x1F]/; +/** + * Return the suffix of a string starting at a particular index. + * This both expresses intent and potentially avoids slow `substring` in XS. + * https://github.com/endojs/endo/issues/1984 + * + * @param {string} str + * @param {number} index + * @returns {string} + */ +const getSuffix = (str, index) => (index === 0 ? str : str.substring(index)); + /** * Assuming that `record` is a CopyRecord, we have only * string-named own properties. `recordNames` returns those name *reverse* @@ -123,19 +134,21 @@ const encodeBinary64 = n => { /** * @param {string} encoded + * @param {number} [skip] * @returns {number} */ -const decodeBinary64 = encoded => { - encoded.charAt(0) === 'f' || Fail`Encoded number expected: ${encoded}`; - let bits = BigInt(`0x${encoded.substring(1)}`); - if (encoded[1] < '8') { +const decodeBinary64 = (encoded, skip = 0) => { + encoded.charAt(skip) === 'f' || Fail`Encoded number expected: ${encoded}`; + let bits = BigInt(`0x${getSuffix(encoded, skip + 1)}`); + if (encoded.charAt(skip + 1) < '8') { bits ^= 0xffffffffffffffffn; } else { bits ^= 0x8000000000000000n; } asBits[0] = bits; const result = asNumber[0]; - !is(result, -0) || Fail`Unexpected negative zero: ${encoded}`; + !is(result, -0) || + Fail`Unexpected negative zero: ${getSuffix(encoded, skip)}`; return result; }; @@ -346,7 +359,7 @@ const decodeCompactArray = (encoded, decodePassable, skip = 0) => { const index = /** @type {number} */ (i); if (index <= skip) { if (index === skip) { - ch === '^' || Fail`Encoded array expected: ${encoded.slice(skip)}`; + ch === '^' || Fail`Encoded array expected: ${getSuffix(encoded, skip)}`; } } else if (ch === '^') { // This is the start of a nested array. @@ -375,9 +388,12 @@ const decodeCompactArray = (encoded, decodePassable, skip = 0) => { // Advance the index. nextIndex = index + 1; } - depth === 0 || Fail`unterminated array: ${encoded.slice(skip)}`; + depth === 0 || Fail`unterminated array: ${getSuffix(encoded, skip)}`; nextIndex === encoded.length || - Fail`unterminated array element: ${encoded.slice(currentElementStart)}`; + Fail`unterminated array element: ${getSuffix( + encoded, + currentElementStart, + )}`; return harden(elements); }; @@ -424,7 +440,7 @@ const decodeLegacyArray = (encoded, decodePassable, skip = 0) => { if (stillToSkip > 0) { stillToSkip -= 1; if (stillToSkip === 0) { - c === '[' || Fail`Encoded array expected: ${encoded.slice(skip)}`; + c === '[' || Fail`Encoded array expected: ${getSuffix(encoded, skip)}`; } } else if (inEscape) { c === '\u0000' || @@ -445,9 +461,9 @@ const decodeLegacyArray = (encoded, decodePassable, skip = 0) => { } inEscape = false; } - !inEscape || Fail`unexpected end of encoding ${encoded.slice(skip)}`; + !inEscape || Fail`unexpected end of encoding ${getSuffix(encoded, skip)}`; elemChars.length === 0 || - Fail`encoding terminated early: ${encoded.slice(skip)}`; + Fail`encoding terminated early: ${getSuffix(encoded, skip)}`; return harden(elements); }; @@ -457,19 +473,20 @@ const encodeRecord = (record, encodeArray, encodePassable) => { return `(${encodeArray(harden([names, values]), encodePassable)}`; }; -const decodeRecord = (encoded, decodeArray, decodePassable) => { - assert(encoded.charAt(0) === '('); +const decodeRecord = (encoded, decodeArray, decodePassable, skip = 0) => { + assert(encoded.charAt(skip) === '('); // Skip the "(" inside `decodeArray` to avoid slow `substring` in XS. // https://github.com/endojs/endo/issues/1984 - const unzippedEntries = decodeArray(encoded, decodePassable, 1); - unzippedEntries.length === 2 || Fail`expected keys,values pair: ${encoded}`; + const unzippedEntries = decodeArray(encoded, decodePassable, skip + 1); + unzippedEntries.length === 2 || + Fail`expected keys,values pair: ${getSuffix(encoded, skip)}`; const [keys, vals] = unzippedEntries; (passStyleOf(keys) === 'copyArray' && passStyleOf(vals) === 'copyArray' && keys.length === vals.length && keys.every(key => typeof key === 'string')) || - Fail`not a valid record encoding: ${encoded}`; + Fail`not a valid record encoding: ${getSuffix(encoded, skip)}`; const mapEntries = keys.map((key, i) => [key, vals[i]]); const record = harden(fromEntries(mapEntries)); assertRecord(record, 'decoded record'); @@ -479,15 +496,16 @@ const decodeRecord = (encoded, decodeArray, decodePassable) => { const encodeTagged = (tagged, encodeArray, encodePassable) => `:${encodeArray(harden([getTag(tagged), tagged.payload]), encodePassable)}`; -const decodeTagged = (encoded, decodeArray, decodePassable) => { - assert(encoded.charAt(0) === ':'); +const decodeTagged = (encoded, decodeArray, decodePassable, skip = 0) => { + assert(encoded.charAt(skip) === ':'); // Skip the ":" inside `decodeArray` to avoid slow `substring` in XS. // https://github.com/endojs/endo/issues/1984 - const taggedPayload = decodeArray(encoded, decodePassable, 1); - taggedPayload.length === 2 || Fail`expected tag,payload pair: ${encoded}`; + const taggedPayload = decodeArray(encoded, decodePassable, skip + 1); + taggedPayload.length === 2 || + Fail`expected tag,payload pair: ${getSuffix(encoded, skip)}`; const [tag, payload] = taggedPayload; passStyleOf(tag) === 'string' || - Fail`not a valid tagged encoding: ${encoded}`; + Fail`not a valid tagged encoding: ${getSuffix(encoded, skip)}`; return makeTagged(tag, payload); }; @@ -653,14 +671,14 @@ const liberalDecoders = /** @type {Required} */ ( /** * @param {(encoded: string) => string} decodeStringSuffix - * @param {(encoded: string, decodeRecur: (e: string) => Passable) => unknown[]} decodeArray + * @param {(encoded: string, decodeRecur: (e: string) => Passable, skip?: number) => unknown[]} decodeArray * @param {Required} options - * @returns {(encoded: string) => Passable} + * @returns {(encoded: string, skip?: number) => Passable} */ const makeInnerDecode = (decodeStringSuffix, decodeArray, options) => { const { decodeRemotable, decodePromise, decodeError } = options; - const innerDecode = encoded => { - switch (encoded.charAt(0)) { + const innerDecode = (encoded, skip = 0) => { + switch (encoded.charAt(skip)) { case 'v': { return null; } @@ -668,49 +686,50 @@ const makeInnerDecode = (decodeStringSuffix, decodeArray, options) => { return undefined; } case 'f': { - return decodeBinary64(encoded); + return decodeBinary64(encoded, skip); } case 's': { - return decodeStringSuffix(encoded.slice(1)); + return decodeStringSuffix(getSuffix(encoded, skip + 1)); } case 'b': { - if (encoded === 'btrue') { + const substring = getSuffix(encoded, skip + 1); + if (substring === 'true') { return true; - } else if (encoded === 'bfalse') { + } else if (substring === 'false') { return false; } - throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${encoded}`; + throw Fail`expected encoded boolean to be "btrue" or "bfalse": ${substring}`; } case 'n': case 'p': { - return decodeBigInt(encoded); + return decodeBigInt(getSuffix(encoded, skip)); } case 'r': { - return decodeRemotable(encoded, innerDecode); + return decodeRemotable(getSuffix(encoded, skip), innerDecode); } case '?': { - return decodePromise(encoded, innerDecode); + return decodePromise(getSuffix(encoded, skip), innerDecode); } case '!': { - return decodeError(encoded, innerDecode); + return decodeError(getSuffix(encoded, skip), innerDecode); } case 'y': { // Strings and symbols share decoding logic. - const name = decodeStringSuffix(encoded.slice(1)); + const name = decodeStringSuffix(getSuffix(encoded, skip + 1)); return passableSymbolForName(name); } case '[': case '^': { - return decodeArray(encoded, innerDecode); + return decodeArray(encoded, innerDecode, skip); } case '(': { - return decodeRecord(encoded, decodeArray, innerDecode); + return decodeRecord(encoded, decodeArray, innerDecode, skip); } case ':': { - return decodeTagged(encoded, decodeArray, innerDecode); + return decodeTagged(encoded, decodeArray, innerDecode, skip); } default: { - throw Fail`invalid database key: ${encoded}`; + throw Fail`invalid database key: ${getSuffix(encoded, skip)}`; } } }; @@ -758,11 +777,11 @@ export const makePassableKit = (options = {}) => { Fail`${b( label, )} encoding must not contain a C0 control character: ${encoding}`; - const decoded = decodeCompactArray(`^s ${encoding} s `, liberalDecode); + const decoded = decodeCompactArray(`^v ${encoding} v `, liberalDecode); (isArray(decoded) && decoded.length === 3 && - decoded[0] === '' && - decoded[2] === '') || + decoded[0] === null && + decoded[2] === null) || Fail`${b(label)} encoding must be embeddable: ${encoding}`; }; const encodeCompact = makeInnerEncode( @@ -794,8 +813,10 @@ export const makePassableKit = (options = {}) => { ); const decodePassable = encoded => { // A leading "~" indicates the v2 encoding (with escaping in strings rather than arrays). - if (encoded.startsWith('~')) { - return decodeCompact(encoded.slice(1)); + // Skip it inside `decodeCompact` to avoid slow `substring` in XS. + // https://github.com/endojs/endo/issues/1984 + if (encoded.charAt(0) === '~') { + return decodeCompact(encoded, 1); } return decodeLegacy(encoded); }; From 70bcca3d4ba93e92221a9188f583126ca84e4e4d Mon Sep 17 00:00:00 2001 From: Richard Gibson Date: Mon, 5 Feb 2024 11:58:07 -0500 Subject: [PATCH 14/14] docs(marshal): Start documenting passable encoding --- packages/marshal/README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/packages/marshal/README.md b/packages/marshal/README.md index 92953f5c9b..f74ed097e6 100644 --- a/packages/marshal/README.md +++ b/packages/marshal/README.md @@ -41,6 +41,25 @@ console.log(o1 === o2); // false ``` +Additionally, this module exports a `makePassableKit` function for encoding into +and decoding from a directly-serialized format in which string comparison +corresponds with arbitrary value comparison (cf. +[Patterns: Rank order and key order](https://github.com/endojs/endo/blob/master/packages/patterns/README.md#rank-order-and-key-order). +Rather than accepting `convertValToSlot` and `convertSlotToVal` functions and +keeping a "slots" side table, `makePassableKit` expects +{encode,decode}{Remotable,Promise,Error} functions that directly convert between +instances of the respective pass styles and properly-formatted encodings +(in which Remotable encodings start with "r", Promise encodings start with "?", +Error encodings start with "!", and all other details are left to the provided +functions). +`makePassableKit` supports two variations of this format: "legacyOrdered" and +"compactOrdered". The former is the default for historical reasons (see +https://github.com/endojs/endo/pull/1594 for background) but the latter is +preferred for its better handling of deep structure. The ordering guarantees are +upheld within each format variation, but not across them (i.e., it is not +correct to treat a string comparison of legacyOrdered vs. compactOrdered as a +corresponding value comparison). + ## Frozen Objects Only The entire object graph must be "hardened" (recursively frozen), such as done