Skip to content

Commit

Permalink
Zero-cost version-vector database upgrade (#1895)
Browse files Browse the repository at this point in the history
* Update CMake CMAKE_OSX_DEPLOYMENT_TARGET to 10.14

It's been updated to 10.14 in Xcode; CMake should be consistent.

* Upgrade docs to version vectors lazily, on 1st write

Eliminates the cost of upgrading an existing db to version vectors.
Instead of converting all docs to version-vector format the first
time the db is opened, it leaves them alone.

A C4Document is opened using its existing versioning type, unless
the new document flag kDocGetUpgraded is used. (Even that flag
doesn't change the representation on disk.)

A C4Document is upgraded on disk when it's saved.

The replicator loads docs using kDocGetUpgraded since it uses
version vectors.
  • Loading branch information
snej authored Oct 27, 2023
1 parent 733f9e9 commit 582d690
Show file tree
Hide file tree
Showing 24 changed files with 612 additions and 254 deletions.
1 change: 1 addition & 0 deletions C/include/c4DocumentTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ typedef C4_ENUM(uint8_t, C4DocContentLevel){
kDocGetMetadata, ///< Only get revID and flags
kDocGetCurrentRev, ///< Get current revision body but not other revisions/remotes
kDocGetAll, ///< Get everything
kDocGetUpgraded, ///< Get everything, upgrade to latest format (version vectors)
}; // Note: Same as litecore::ContentOption

// Ignore warning about not initializing members, it must be this way to be C-compatible
Expand Down
237 changes: 160 additions & 77 deletions C/tests/c4DatabaseTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1324,111 +1324,194 @@ N_WAY_TEST_CASE_METHOD(C4DatabaseTest, "Database Upgrade To Version Vectors", "[
// Initially populate a v3 rev-tree based database:
TransactionHelper t(db);
createNumberedDocs(5);
// Add a deleted doc to make sure it's skipped by default:
// "doc-DEL" is deleted; make sure it's skipped by default:
createRev(c4str("doc-DEL"), kRevID, kC4SliceNull, kRevDeleted);

// Add a 2nd revision to doc 1:
createFleeceRev(db, "doc-001"_sl, nullslice, slice(json5("{doc:'one',rev:'two'}")));
// "doc-001": Add a 2nd revision:
createFleeceRev(db, "doc-001"_sl, kRev2ID, slice(json5("{doc:'one',rev:'two'}")));

// Add a 2nd rev, synced with remote, to doc 2:
// "doc-002": Add a 3rd-gen rev, synced with remote:
createFleeceRev(db, "doc-002"_sl, kRev2ID, slice(json5("{doc:'two',rev:'two'}")));
createFleeceRev(db, "doc-002"_sl, kRev3ID, slice(json5("{doc:'two',rev:'three'}")));
setRemoteRev(db, "doc-002"_sl, kRev3ID, 1);

// Add a 2nd rev, and rev 1 synced with remote, to doc 3:
// "doc-003": Add a 2nd rev synced with remote, and a 3rd local rev:
createFleeceRev(db, "doc-003"_sl, kRev2ID, slice(json5("{doc:'three',rev:'two'}")), kRevKeepBody);
setRemoteRev(db, "doc-003"_sl, kRev2ID, 1);
createFleeceRev(db, "doc-003"_sl, kRev3ID, slice(json5("{doc:'three',rev:'three'}")));

// Add a conflict to doc 4:
// "doc-004": Current rev conflicts with the remote:
createFleeceRev(db, "doc-004"_sl, kRev2ID, slice(json5("{doc:'four',rev:'two'}")));
createFleeceRev(db, "doc-004"_sl, kRev3ID, slice(json5("{doc:'four',rev:'three'}")));
createConflictingRev(db, "doc-004"_sl, kRev2ID, "3-cc"_sl);
setRemoteRev(db, "doc-004"_sl, "3-cc"_sl, 1);
}

// Reopen database, upgrading to version vectors:
// Reopen database, enabling version vectors:
C4DatabaseConfig2 config = dbConfig();
config.flags |= kC4DB_VersionVectors | kC4DB_FakeVectorClock;
config.flags |= kC4DB_VersionVectors;
closeDB();
C4Log("---- Reopening db with version vectors ---");
db = c4db_openNamed(kDatabaseName, &config, ERROR_INFO());
REQUIRE(db);

// Note: The revID/version checks below hardcode the base timestamp used for upgrading legacy
// replicated revIDs. It's currently 0x1770000000000000 (see HybridClock.hh). If that value
// changes, or the scheme for converting rev-tree revIDs to versions changes, the values below
// need to change too.
REQUIRE(uint64_t(litecore::kMinValidTime) == 0x1770000000000000);
auto defaultColl = c4db_getDefaultCollection(db, nullptr);

// Check doc 1:
C4Document* doc;
auto defaultColl = c4db_getDefaultCollection(db, nullptr);
doc = c4coll_getDoc(defaultColl, "doc-001"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1@*");
alloc_slice versionVector(c4doc_getRevisionHistory(doc, 0, nullptr, 0));
CHECK(versionVector == "1@*");
CHECK(doc->sequence == 7);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"one","rev":"two"})");
c4doc_release(doc);
SECTION("Read-Only") {
// Check doc 1:
C4Document* doc;
doc = c4coll_getDoc(defaultColl, "doc-001"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "2-c001d00d");
alloc_slice history(c4doc_getRevisionHistory(doc, 0, nullptr, 0));
CHECK(history == "2-c001d00d");
CHECK(doc->sequence == 7);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"one","rev":"two"})");
c4doc_release(doc);

// Check doc 2:
doc = c4coll_getDoc(defaultColl, "doc-002"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(c4rev_getTimestamp(doc->revID) == uint64_t(litecore::kMinValidTime) + 3);
CHECK(slice(doc->revID) == "1770000000000003@?"); // 0x1770000000000003 = kMinValidTime + 3
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000003@?");
CHECK(doc->sequence == 9);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");
alloc_slice remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "1770000000000003@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");
c4doc_release(doc);
// Check doc 2:
doc = c4coll_getDoc(defaultColl, "doc-002"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "3-deadbeef");
history = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(history == "3-deadbeef");
CHECK(doc->sequence == 9);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");
alloc_slice remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "3-deadbeef");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");
c4doc_release(doc);

// Check doc 3:
doc = c4coll_getDoc(defaultColl, "doc-003"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "2@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "2@*; 1770000000000002@?");
CHECK(doc->sequence == 11);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "2@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"two"})");
c4doc_release(doc);
// Check doc 3:
doc = c4coll_getDoc(defaultColl, "doc-003"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "3-deadbeef");
history = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(history == "3-deadbeef, 2-c001d00d");
CHECK(doc->sequence == 11);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "2-c001d00d");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"two"})");
c4doc_release(doc);

// Check doc 4:
doc = c4coll_getDoc(defaultColl, "doc-004"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "3@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "3@*; 1770000000000002@?");
CHECK(doc->sequence == 14);
CHECK(doc->flags == (kDocConflicted | kDocExists));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"four","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "3@?");
REQUIRE(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "3@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"ans*wer":42})");
c4doc_release(doc);
// Check doc 4:
doc = c4coll_getDoc(defaultColl, "doc-004"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "3-deadbeef");
history = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(history == "3-deadbeef"); // parent 2-c001d00d doesn't show up bc it isn't a remote
CHECK(doc->sequence == 14);
CHECK(doc->flags == (kDocConflicted | kDocExists));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"four","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "3-cc");
REQUIRE(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
history = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(history == "3-cc");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"ans*wer":42})");
c4doc_release(doc);

// Check deleted doc:
doc = c4coll_getDoc(defaultColl, "doc-DEL"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "5@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "5@*");
CHECK(doc->sequence == 6);
CHECK(doc->flags == (kDocDeleted | kDocExists));
c4doc_release(doc);
// Check deleted doc:
doc = c4coll_getDoc(defaultColl, "doc-DEL"_sl, true, kDocGetAll, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1-abcd");
history = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(history == "1-abcd");
CHECK(doc->sequence == 6);
CHECK(doc->flags == (kDocDeleted | kDocExists));
c4doc_release(doc);
}

SECTION("Upgrading") {
// Note: The revID/version checks below hardcode the base timestamp used for upgrading legacy
// replicated revIDs. It's currently 0x1770000000000000 (see HybridClock.hh). If that value
// changes, or the scheme for converting rev-tree revIDs to versions changes, the values below
// need to change too.
REQUIRE(uint64_t(litecore::kMinValidTime) == 0x1770000000000000);

// Check doc 1:
C4Document* doc;
doc = c4coll_getDoc(defaultColl, "doc-001"_sl, true, kDocGetUpgraded, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1770000000000002@*");
alloc_slice versionVector(c4doc_getRevisionHistory(doc, 0, nullptr, 0));
CHECK(versionVector == "1770000000000002@*");
CHECK(doc->sequence == 7);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"one","rev":"two"})");
c4doc_release(doc);

// Check doc 2:
doc = c4coll_getDoc(defaultColl, "doc-002"_sl, true, kDocGetUpgraded, ERROR_INFO());
REQUIRE(doc);
CHECK(c4rev_getTimestamp(doc->revID) == uint64_t(litecore::kMinValidTime) + 3);
CHECK(slice(doc->revID) == "1770000000000003@?"); // 0x1770000000000003 = kMinValidTime + 3
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000003@?");
CHECK(doc->sequence == 9);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");
alloc_slice remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "1770000000000003@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"two","rev":"three"})");

// update & save it:
{
TransactionHelper t(db);
C4Document* newDoc = c4doc_update(doc, kFleeceBody, 0, ERROR_INFO());
REQUIRE(newDoc);
CHECK(slice(newDoc->revID) > "178532e35bcd0001@*");
alloc_slice newVersionVector(c4doc_getRevisionHistory(newDoc, 0, nullptr, 0));
CHECK(newVersionVector.hasSuffix("@*; 1770000000000003@?"));
c4doc_release(newDoc);
}
c4doc_release(doc);

// Check doc 3:
doc = c4coll_getDoc(defaultColl, "doc-003"_sl, true, kDocGetUpgraded, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1770000000000003@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000003@*; 1770000000000002@?");
CHECK(doc->sequence == 11);
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "1770000000000002@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"three","rev":"two"})");
c4doc_release(doc);

// Check doc 4:
doc = c4coll_getDoc(defaultColl, "doc-004"_sl, true, kDocGetUpgraded, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1770000000000003@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000003@*");
CHECK(doc->sequence == 14);
CHECK(doc->flags == (kDocConflicted | kDocExists));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"doc":"four","rev":"three"})");
remoteVers = c4doc_getRemoteAncestor(doc, 1);
CHECK(remoteVers == "1770000000000003@?");
REQUIRE(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000003@?");
CHECK(c4doc_selectRevision(doc, remoteVers, true, WITH_ERROR()));
CHECK(Dict(c4doc_getProperties(doc)).toJSONString() == R"({"ans*wer":42})");
c4doc_release(doc);

// Check deleted doc:
doc = c4coll_getDoc(defaultColl, "doc-DEL"_sl, true, kDocGetUpgraded, ERROR_INFO());
REQUIRE(doc);
CHECK(slice(doc->revID) == "1770000000000001@*");
versionVector = c4doc_getRevisionHistory(doc, 0, nullptr, 0);
CHECK(versionVector == "1770000000000001@*");
CHECK(doc->sequence == 6);
CHECK(doc->flags == (kDocDeleted | kDocExists));
c4doc_release(doc);
}
}

// CBL-3706: Previously, calling these functions after deleting the default collection causes
Expand Down
13 changes: 7 additions & 6 deletions C/tests/c4Test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -422,12 +422,13 @@ string C4Test::createNewRev(C4Collection* coll, C4Slice docID, C4Slice curRevID,
rq.save = true;
C4Error error;
auto doc = c4coll_putDoc(coll, &rq, nullptr, &error);
//if (!doc) {
//char buf[256];
//INFO("Error: " << c4error_getDescriptionC(error, buf, sizeof(buf)));
//}
//REQUIRE(doc != nullptr); // can't use Catch on bg threads
C4Assert(doc != nullptr);
if ( !doc ) {
// can't use Catch (CHECK, REQUIRE) on bg threads
alloc_slice bt = c4error_getBacktrace(error);
if ( bt ) C4Log("Error backtrace:\n%.*s", FMTSLICE(bt));
char buf[256];
C4Assert(doc != nullptr, c4error_getDescriptionC(error, buf, sizeof(buf)));
}
string revID((char*)doc->revID.buf, doc->revID.size);
c4doc_release(doc);
return revID;
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ cmake_policy(VERSION 3.9)
cmake_policy(SET CMP0057 NEW)

# Mac/apple setup -- must appear before the first "project()" line"
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.12")
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
if(NOT DEFINED CMAKE_OSX_SYSROOT)
# Tells Mac builds to use the current SDK's headers & libs, not what's in the OS.
set(CMAKE_OSX_SYSROOT macosx)
Expand Down
17 changes: 15 additions & 2 deletions LiteCore/Database/DatabaseImpl+Upgrade.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,15 @@
#include <utility>
#include <vector>

#define ENABLE_VV_UPGRADE 0

namespace litecore {
using namespace std;

static constexpr const char* kNameOfVersioning[3] = {"v2.x rev-trees", "v3.x rev-trees", "version vectors"};


#if ENABLE_VV_UPGRADE
static void upgradeToVersionVectors(DatabaseImpl*, const Record&, RevTreeRecord&, ExclusiveTransaction&);
static pair<alloc_slice, alloc_slice> upgradeRemoteRevs(DatabaseImpl*, Record, RevTreeRecord&,
alloc_slice currentVersion);
Expand All @@ -42,12 +45,19 @@ namespace litecore {
}
return nullptr;
}
#endif

void DatabaseImpl::upgradeDocumentVersioning(C4DocumentVersioning curVersioning, C4DocumentVersioning newVersioning,
ExclusiveTransaction& t) {
#if !ENABLE_VV_UPGRADE
// Don't upgrade to version vectors; let it happen lazily.
if ( newVersioning == kC4VectorVersioning ) newVersioning = kC4TreeVersioning;
#endif

if ( newVersioning == curVersioning ) return;
if ( newVersioning < curVersioning )
error::_throw(error::Unimplemented, "Cannot downgrade document versioning");

if ( _config.flags & (kC4DB_ReadOnly | kC4DB_NoUpgrade) )
error::_throw(error::CantUpgradeDatabase, "Document versioning needs upgrade");

Expand Down Expand Up @@ -82,8 +92,10 @@ namespace litecore {
if ( isVersionVector(rec) ) continue;
RevTreeRecord revTree(defaultKeyStore(), rec);
if ( newVersioning == kC4VectorVersioning ) {
#if ENABLE_VV_UPGRADE
// Upgrade from rev-trees (v2 or v3) to version-vectors:
upgradeToVersionVectors(this, rec, revTree, t);
#endif
} else {
// Upgrading v2 rev-trees to new db schema with `extra` column;
// simply resave and RevTreeRecord will use the new schema:
Expand All @@ -100,6 +112,7 @@ namespace litecore {
LogTo(DBLog, "\t%" PRIu64 " documents upgraded, now committing changes...", docCount);
}

#if ENABLE_VV_UPGRADE
// Upgrades a Record from rev-trees to version vectors.
static void upgradeToVersionVectors(DatabaseImpl* db, const Record& rec, RevTreeRecord& revTree,
ExclusiveTransaction& t) {
Expand Down Expand Up @@ -157,7 +170,7 @@ namespace litecore {
rec.setExtra(nullslice);

// Instantiate a VectorRecord for this document, without reading the database:
VectorRecord nuDoc(db->defaultKeyStore(), Versioning::RevTrees, rec);
VectorRecord nuDoc(db->defaultKeyStore(), rec);
nuDoc.setEncoder(db->sharedFleeceEncoder());

// Add each remote revision:
Expand Down Expand Up @@ -185,6 +198,6 @@ namespace litecore {

return nuDoc.encodeBodyAndExtra();
}

#endif // ENABLE_VV_UPGRADE

} // namespace litecore
Loading

0 comments on commit 582d690

Please sign in to comment.