From 0151e84f07fff8f3eeeb6b06a03846179f6da1d6 Mon Sep 17 00:00:00 2001 From: Vincent Weevers Date: Sat, 3 Feb 2024 15:30:52 +0100 Subject: [PATCH] Allow batches to write to a nondescendant sublevel Closes #80. Follow-up for bcb4192. --- README.md | 6 +++--- UPGRADING.md | 26 +++++------------------ abstract-chained-batch.js | 25 +++++++++++++++------- abstract-level.js | 25 +++++++++++++++------- lib/prefixes.js | 13 ++++++++++-- lib/prewrite-batch.js | 22 ++++++++++++++------ test/hooks/prewrite.js | 44 +++++++++++++++++++++++++++++++++++++++ test/sublevel-test.js | 23 ++++++++++++++++++++ 8 files changed, 137 insertions(+), 47 deletions(-) diff --git a/README.md b/README.md index 949f59a..132cf02 100644 --- a/README.md +++ b/README.md @@ -316,7 +316,7 @@ Perform multiple _put_ and/or _del_ operations in bulk. Returns a promise. The ` Each operation must be an object with at least a `type` property set to either `'put'` or `'del'`. If the `type` is `'put'`, the operation must have `key` and `value` properties. It may optionally have `keyEncoding` and / or `valueEncoding` properties to encode keys or values with a custom encoding for just that operation. If the `type` is `'del'`, the operation must have a `key` property and may optionally have a `keyEncoding` property. -An operation of either type may also have a `sublevel` property, to prefix the key of the operation with the prefix of that sublevel. This allows atomically committing data to multiple sublevels. The given `sublevel` must be a descendant of `db`. Keys and values will be encoded by the sublevel, to the same effect as a `sublevel.batch(..)` call. In the following example, the first `value` will be encoded with `'json'` rather than the default encoding of `db`: +An operation of either type may also have a `sublevel` property, to prefix the key of the operation with the prefix of that sublevel. This allows atomically committing data to multiple sublevels. The given `sublevel` must have the same _root_ (i.e. top-most) database as `db`. Keys and values will be encoded by the sublevel, to the same effect as a `sublevel.batch(..)` call. In the following example, the first `value` will be encoded with `'json'` rather than the default encoding of `db`: ```js const people = db.sublevel('people', { valueEncoding: 'json' }) @@ -579,14 +579,14 @@ Add a `put` operation to this chained batch, not committed until `write()` is ca - `keyEncoding`: custom key encoding for this operation, used to encode the `key`. - `valueEncoding`: custom value encoding for this operation, used to encode the `value`. -- `sublevel` (sublevel instance): act as though the `put` operation is performed on the given sublevel, to similar effect as `sublevel.batch().put(key, value)`. This allows atomically committing data to multiple sublevels. The given `sublevel` must be a descendant of `db`. The `key` will be prefixed with the prefix of the sublevel, and the `key` and `value` will be encoded by the sublevel (using the default encodings of the sublevel unless `keyEncoding` and / or `valueEncoding` are provided). +- `sublevel` (sublevel instance): act as though the `put` operation is performed on the given sublevel, to similar effect as `sublevel.batch().put(key, value)`. This allows atomically committing data to multiple sublevels. The given `sublevel` must have the same _root_ (i.e. top-most) database as `chainedBatch.db`. The `key` will be prefixed with the prefix of the sublevel, and the `key` and `value` will be encoded by the sublevel (using the default encodings of the sublevel unless `keyEncoding` and / or `valueEncoding` are provided). #### `chainedBatch.del(key[, options])` Add a `del` operation to this chained batch, not committed until `write()` is called. This will throw a [`LEVEL_INVALID_KEY`](#errors) error if `key` is invalid. The optional `options` object may contain: - `keyEncoding`: custom key encoding for this operation, used to encode the `key`. -- `sublevel` (sublevel instance): act as though the `del` operation is performed on the given sublevel, to similar effect as `sublevel.batch().del(key)`. This allows atomically committing data to multiple sublevels. The given `sublevel` must be a descendant of `db`. The `key` will be prefixed with the prefix of the sublevel, and the `key` will be encoded by the sublevel (using the default key encoding of the sublevel unless `keyEncoding` is provided). +- `sublevel` (sublevel instance): act as though the `del` operation is performed on the given sublevel, to similar effect as `sublevel.batch().del(key)`. This allows atomically committing data to multiple sublevels. The given `sublevel` must have the same _root_ (i.e. top-most) database as `chainedBatch.db`. The `key` will be prefixed with the prefix of the sublevel, and the `key` will be encoded by the sublevel (using the default key encoding of the sublevel unless `keyEncoding` is provided). #### `chainedBatch.clear()` diff --git a/UPGRADING.md b/UPGRADING.md index f68d2dc..ddd2930 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -11,7 +11,7 @@ This document describes breaking changes and how to upgrade. For a complete list - [1.1. Callbacks have been removed](#11-callbacks-have-been-removed) - [1.2. Not found](#12-not-found) - [1.3. Not ready](#13-not-ready) - - [1.4. Hooks](#14-hooks) + - [1.4. Slower nested sublevels](#14-slower-nested-sublevels) - [1.5. Open before creating a chained batch](#15-open-before-creating-a-chained-batch) - [2. Private API](#2-private-api) - [2.1. Promises all the way](#21-promises-all-the-way) @@ -136,39 +136,23 @@ Or simply: await db.get('example') ``` -#### 1.4. Hooks +#### 1.4. Slower nested sublevels -This release adds [hooks](./README.md#hooks). To achieve this feature, two low-impact breaking changes have been made to nested sublevels. Nested sublevels, no matter their depth, were previously all connected to the same parent database rather than forming a tree. In the following example, the `colorIndex` sublevel would previously forward its operations directly to `db`: +The internals of nested sublevels have been refactored for the benefit of [hooks](./README.md#hooks). Nested sublevels, no matter their depth, were previously all connected to the same parent database rather than forming a tree. In the following example, the `colorIndex` sublevel would previously forward its operations directly to `db`: ```js const indexes = db.sublevel('idx') const colorIndex = indexes.sublevel('colors') ``` -It will now forward its operations to `indexes`, which in turn forwards them to `db`. At each step, hooks and events are available to transform and react to data from a different perspective. Which comes at a (typically small) performance cost that increases with further nested sublevels. This decreased performance is the first breaking change and mainly affects sublevels nested at a depth of more than 2. +It will now forward its operations to `indexes`, which in turn forwards them to `db`. At each step, hooks and events are available to transform and react to data from a different perspective. Which comes at a (typically small) performance cost that increases with further nested sublevels. -To optionally negate it, a new feature has been added to `db.sublevel(name)`: it now also accepts a `name` that is an array. If the `indexes` sublevel is only used to organize keys and not directly interfaced with, operations on `colorIndex` can be made faster by skipping `indexes`: +To optionally negate that cost, a new feature has been added to `db.sublevel(name)`: it now also accepts a `name` that is an array. If the `indexes` sublevel is only used to organize keys and not directly interfaced with, operations on `colorIndex` can be made faster by skipping `indexes`: ```js const colorIndex = db.sublevel(['idx', 'colors']) ``` -The second breaking change is that if a `sublevel` is provided as an option to `db.batch()`, that sublevel must now be a descendant of `db`: - -```js -const colorIndex = indexes.sublevel('colors') -const flavorIndex = indexes.sublevel('flavors') - -// No longer works because colorIndex isn't a descendant of flavorIndex -flavorIndex.batch([{ type: 'del', key: 'blue', sublevel: colorIndex }]) - -// OK -indexes.batch([{ type: 'del', key: 'blue', sublevel: colorIndex }]) - -// OK -db.batch([{ type: 'del', key: 'blue', sublevel: colorIndex }]) -``` - #### 1.5. Open before creating a chained batch It is no longer possible to create a chained batch while the database is opening. If you previously did: diff --git a/abstract-chained-batch.js b/abstract-chained-batch.js index 17452b3..415af16 100644 --- a/abstract-chained-batch.js +++ b/abstract-chained-batch.js @@ -3,7 +3,7 @@ const combineErrors = require('maybe-combine-errors') const ModuleError = require('module-error') const { getOptions, emptyOptions, noop } = require('./lib/common') -const { prefixDescendantKey } = require('./lib/prefixes') +const { prefixDescendantKey, isDescendant } = require('./lib/prefixes') const { PrewriteBatch } = require('./lib/prewrite-batch') const kStatus = Symbol('status') @@ -116,15 +116,25 @@ class AbstractChainedBatch { const keyEncoding = op.keyEncoding const preencodedKey = keyEncoding.encode(op.key) const keyFormat = keyEncoding.format - const encodedKey = delegated ? prefixDescendantKey(preencodedKey, keyFormat, db, this.db) : preencodedKey + + // If the sublevel is not a descendant then forward that option to the parent db + // so that we don't erroneously add our own prefix to the key of the operation. + const siblings = delegated && !isDescendant(op.sublevel, this.db) && op.sublevel !== this.db + const encodedKey = delegated && !siblings + ? prefixDescendantKey(preencodedKey, keyFormat, db, this.db) + : preencodedKey + const valueEncoding = op.valueEncoding const encodedValue = valueEncoding.encode(op.value) const valueFormat = valueEncoding.format - // Prevent double prefixing - if (delegated) op.sublevel = null + // Only prefix once + if (delegated && !siblings) { + op.sublevel = null + } - if (this[kPublicOperations] !== null) { + // If the sublevel is not a descendant then we shouldn't emit events + if (this[kPublicOperations] !== null && !siblings) { // Clone op before we mutate it for the private API const publicOperation = Object.assign({}, op) publicOperation.encodedKey = encodedKey @@ -139,7 +149,7 @@ class AbstractChainedBatch { } this[kPublicOperations].push(publicOperation) - } else if (this[kLegacyOperations] !== null) { + } else if (this[kLegacyOperations] !== null && !siblings) { const legacyOperation = Object.assign({}, original) legacyOperation.type = 'put' @@ -149,7 +159,8 @@ class AbstractChainedBatch { this[kLegacyOperations].push(legacyOperation) } - op.key = this.db.prefixKey(encodedKey, keyFormat, true) + // If we're forwarding the sublevel option then don't prefix the key yet + op.key = siblings ? encodedKey : this.db.prefixKey(encodedKey, keyFormat, true) op.value = encodedValue op.keyEncoding = keyFormat op.valueEncoding = valueFormat diff --git a/abstract-level.js b/abstract-level.js index 141bd7a..8703e5c 100644 --- a/abstract-level.js +++ b/abstract-level.js @@ -13,7 +13,7 @@ const { DatabaseHooks } = require('./lib/hooks') const { PrewriteBatch } = require('./lib/prewrite-batch') const { EventMonitor } = require('./lib/event-monitor') const { getOptions, noop, emptyOptions, resolvedPromise } = require('./lib/common') -const { prefixDescendantKey } = require('./lib/prefixes') +const { prefixDescendantKey, isDescendant } = require('./lib/prefixes') const { DeferredQueue } = require('./lib/deferred-queue') const rangeOptions = require('./lib/range-options') @@ -603,18 +603,26 @@ class AbstractLevel extends EventEmitter { } // Encode data for private API - // TODO: benchmark a try/catch around this const keyEncoding = op.keyEncoding const preencodedKey = keyEncoding.encode(op.key) const keyFormat = keyEncoding.format - const encodedKey = delegated ? prefixDescendantKey(preencodedKey, keyFormat, db, this) : preencodedKey - // Prevent double prefixing - if (delegated) op.sublevel = null + // If the sublevel is not a descendant then forward that option to the parent db + // so that we don't erroneously add our own prefix to the key of the operation. + const siblings = delegated && !isDescendant(op.sublevel, this) && op.sublevel !== this + const encodedKey = delegated && !siblings + ? prefixDescendantKey(preencodedKey, keyFormat, db, this) + : preencodedKey + + // Only prefix once + if (delegated && !siblings) { + op.sublevel = null + } let publicOperation = null - if (enableWriteEvent) { + // If the sublevel is not a descendant then we shouldn't emit events + if (enableWriteEvent && !siblings) { // Clone op before we mutate it for the private API // TODO (future semver-major): consider sending this shape to private API too publicOperation = Object.assign({}, op) @@ -629,7 +637,8 @@ class AbstractLevel extends EventEmitter { publicOperations[i] = publicOperation } - op.key = this.prefixKey(encodedKey, keyFormat, true) + // If we're forwarding the sublevel option then don't prefix the key yet + op.key = siblings ? encodedKey : this.prefixKey(encodedKey, keyFormat, true) op.keyEncoding = keyFormat if (isPut) { @@ -640,7 +649,7 @@ class AbstractLevel extends EventEmitter { op.value = encodedValue op.valueEncoding = valueFormat - if (enableWriteEvent) { + if (enableWriteEvent && !siblings) { publicOperation.encodedValue = encodedValue if (delegated) { diff --git a/lib/prefixes.js b/lib/prefixes.js index 8fa43f7..13984f8 100644 --- a/lib/prefixes.js +++ b/lib/prefixes.js @@ -1,8 +1,6 @@ 'use strict' exports.prefixDescendantKey = function (key, keyFormat, descendant, ancestor) { - // TODO: optimize - // TODO: throw when ancestor is not descendant's ancestor? while (descendant !== null && descendant !== ancestor) { key = descendant.prefixKey(key, keyFormat, true) descendant = descendant.parent @@ -10,3 +8,14 @@ exports.prefixDescendantKey = function (key, keyFormat, descendant, ancestor) { return key } + +// Check if db is a descendant of ancestor +// TODO: optimize, when used alongside prefixDescendantKey +// which means we visit parents twice. +exports.isDescendant = function (db, ancestor) { + while (true) { + if (db.parent == null) return false + if (db.parent === ancestor) return true + db = db.parent + } +} diff --git a/lib/prewrite-batch.js b/lib/prewrite-batch.js index 452e27f..f71a114 100644 --- a/lib/prewrite-batch.js +++ b/lib/prewrite-batch.js @@ -1,6 +1,6 @@ 'use strict' -const { prefixDescendantKey } = require('./prefixes') +const { prefixDescendantKey, isDescendant } = require('./prefixes') const kDb = Symbol('db') const kPrivateOperations = Symbol('privateOperations') @@ -40,14 +40,23 @@ class PrewriteBatch { const keyEncoding = op.keyEncoding const preencodedKey = keyEncoding.encode(op.key) const keyFormat = keyEncoding.format - const encodedKey = delegated ? prefixDescendantKey(preencodedKey, keyFormat, db, this[kDb]) : preencodedKey - // Prevent double prefixing - if (delegated) op.sublevel = null + // If the sublevel is not a descendant then forward that option to the parent db + // so that we don't erroneously add our own prefix to the key of the operation. + const siblings = delegated && !isDescendant(op.sublevel, this[kDb]) && op.sublevel !== this[kDb] + const encodedKey = delegated && !siblings + ? prefixDescendantKey(preencodedKey, keyFormat, db, this[kDb]) + : preencodedKey + + // Only prefix once + if (delegated && !siblings) { + op.sublevel = null + } let publicOperation = null - if (this[kPublicOperations] !== null) { + // If the sublevel is not a descendant then we shouldn't emit events + if (this[kPublicOperations] !== null && !siblings) { // Clone op before we mutate it for the private API publicOperation = Object.assign({}, op) publicOperation.encodedKey = encodedKey @@ -61,7 +70,8 @@ class PrewriteBatch { this[kPublicOperations].push(publicOperation) } - op.key = this[kDb].prefixKey(encodedKey, keyFormat, true) + // If we're forwarding the sublevel option then don't prefix the key yet + op.key = siblings ? encodedKey : this[kDb].prefixKey(encodedKey, keyFormat, true) op.keyEncoding = keyFormat if (isPut) { diff --git a/test/hooks/prewrite.js b/test/hooks/prewrite.js index 5b30e29..a7b0efa 100644 --- a/test/hooks/prewrite.js +++ b/test/hooks/prewrite.js @@ -761,4 +761,48 @@ module.exports = function (test, testCommon) { await Promise.all([batchBefore.close(), batchAfter.close()]) return db.close() }) + + // See https://github.com/Level/abstract-level/issues/80 + test('prewrite hook function can write to nondescendant sublevel', async function (t) { + t.plan(2) + + const db = testCommon.factory() + await db.open() + + const books = db.sublevel('books', { valueEncoding: 'json' }) + const index = db.sublevel('authors', { + // Use JSON, which normally doesn't make sense for keys but + // helps to assert that there's no double encoding happening. + keyEncoding: 'json' + }) + + db.on('write', (ops) => { + // Check that data is written to correct sublevels, specifically + // !authors!Hesse~12 rather than !books!!authors!Hesse~12. + t.same(ops.map(x => x.key), ['!books!12', '!authors!"Hesse~12"']) + }) + + books.on('write', (ops) => { + // Should not include the op of the index + t.same(ops.map(x => x.key), ['12']) + }) + + index.on('write', (ops) => { + t.fail('Did not expect an event on index') + }) + + books.hooks.prewrite.add(function (op, batch) { + if (op.type === 'put') { + batch.add({ + type: 'put', + // Key structure is synthetic and not relevant to the test + key: op.value.author + '~' + op.key, + value: '', + sublevel: index + }) + } + }) + + await books.put('12', { title: 'Siddhartha', author: 'Hesse' }) + }) } diff --git a/test/sublevel-test.js b/test/sublevel-test.js index a925c31..3457441 100644 --- a/test/sublevel-test.js +++ b/test/sublevel-test.js @@ -56,6 +56,8 @@ exports.all = function (test, testCommon) { const b = a.sublevel('b') const c = b.sublevel('c') + await Promise.all([a.open(), b.open(), c.open()]) + // Note: may return a transcoder encoding const utf8 = db.keyEncoding('utf8') @@ -120,6 +122,27 @@ exports.all = function (test, testCommon) { t.same(await db.keys().all(), [], 'db has no entries') return db.close() }) + + // See https://github.com/Level/abstract-level/issues/80 + test(`${method} with nondescendant sublevel option`, async function (t) { + const db = testCommon.factory() + await db.open() + + const a = db.sublevel('a') + const b = db.sublevel('b') + + await Promise.all([a.open(), b.open()]) + + // The b sublevel is not a descendant of a, so the sublevel option + // has to be forwarded to db so that the key gets the correct prefix. + if (method === 'batch') { + await a.batch([{ type: 'put', key: 'k', value: 'v', sublevel: b }]) + } else { + await a.batch().put('k', 'v', { sublevel: b }).write() + } + + t.same(await db.keys().all(), ['!b!k'], 'written to sublevel b') + }) } for (const deferred of [false, true]) {