Skip to content

Commit

Permalink
merge bitcoin#24858: incorrect blk file size calculation during reind…
Browse files Browse the repository at this point in the history
…ex results in recoverable blk file corruption
  • Loading branch information
kwvg committed Oct 15, 2024
1 parent 9e75b99 commit b04b71a
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/Makefile.test.include
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ BITCOIN_TESTS =\
test/blockencodings_tests.cpp \
test/blockfilter_tests.cpp \
test/blockfilter_index_tests.cpp \
test/blockmanager_tests.cpp \
test/bloom_tests.cpp \
test/bls_tests.cpp \
test/bswap_tests.cpp \
Expand Down
13 changes: 9 additions & 4 deletions src/node/blockstorage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -790,19 +790,24 @@ bool ReadBlockFromDisk(CBlock& block, const CBlockIndex* pindex, const Consensus
return true;
}

/** Store block on disk. If dbp is non-nullptr, the file is known to already reside on disk */
FlatFilePos BlockManager::SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp)
{
unsigned int nBlockSize = ::GetSerializeSize(block, CLIENT_VERSION);
FlatFilePos blockPos;
if (dbp != nullptr) {
const auto position_known {dbp != nullptr};
if (position_known) {
blockPos = *dbp;
} else {
// when known, blockPos.nPos points at the offset of the block data in the blk file. that already accounts for
// the serialization header present in the file (the 4 magic message start bytes + the 4 length bytes = 8 bytes = BLOCK_SERIALIZATION_HEADER_SIZE).
// we add BLOCK_SERIALIZATION_HEADER_SIZE only for new blocks since they will have the serialization header added when written to disk.
nBlockSize += static_cast<unsigned int>(BLOCK_SERIALIZATION_HEADER_SIZE);
}
if (!FindBlockPos(blockPos, nBlockSize + 8, nHeight, active_chain, block.GetBlockTime(), dbp != nullptr)) {
if (!FindBlockPos(blockPos, nBlockSize, nHeight, active_chain, block.GetBlockTime(), position_known)) {
error("%s: FindBlockPos failed", __func__);
return FlatFilePos();
}
if (dbp == nullptr) {
if (!position_known) {
if (!WriteBlockToDisk(block, blockPos, chainparams.MessageStart())) {
AbortNode("Failed to write block");
return FlatFilePos();
Expand Down
4 changes: 4 additions & 0 deletions src/node/blockstorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ static const unsigned int UNDOFILE_CHUNK_SIZE = 0x100000; // 1 MiB
/** The maximum size of a blk?????.dat file (since 0.8) */
static const unsigned int MAX_BLOCKFILE_SIZE = 0x8000000; // 128 MiB

/** Size of header written by WriteBlockToDisk before a serialized CBlock */
static constexpr size_t BLOCK_SERIALIZATION_HEADER_SIZE = CMessageHeader::MESSAGE_START_SIZE + sizeof(unsigned int);

extern std::atomic_bool fImporting;
extern std::atomic_bool fReindex;
/** Pruning-related variables and constants */
Expand Down Expand Up @@ -185,6 +188,7 @@ class BlockManager
bool WriteUndoDataForBlock(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex* pindex, const CChainParams& chainparams)
EXCLUSIVE_LOCKS_REQUIRED(::cs_main);

/** Store block on disk. If dbp is not nullptr, then it provides the known position of the block within a block file on disk. */
FlatFilePos SaveBlockToDisk(const CBlock& block, int nHeight, CChain& active_chain, const CChainParams& chainparams, const FlatFilePos* dbp);

/** Calculate the amount of disk space the block & undo files currently use */
Expand Down
39 changes: 39 additions & 0 deletions src/test/blockmanager_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) 2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

#include <chainparams.h>
#include <node/blockstorage.h>
#include <node/context.h>
#include <validation.h>

#include <boost/test/unit_test.hpp>
#include <test/util/setup_common.h>

// use BasicTestingSetup here for the data directory configuration, setup, and cleanup
BOOST_FIXTURE_TEST_SUITE(blockmanager_tests, BasicTestingSetup)

BOOST_AUTO_TEST_CASE(blockmanager_find_block_pos)
{
const auto params {CreateChainParams(ArgsManager{}, CBaseChainParams::MAIN)};
BlockManager blockman {};
CChain chain {};
// simulate adding a genesis block normally
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, nullptr).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// simulate what happens during reindex
// simulate a well-formed genesis block being found at offset 8 in the blk00000.dat file
// the block is found at offset 8 because there is an 8 byte serialization header
// consisting of 4 magic bytes + 4 length bytes before each block in a well-formed blk file.
FlatFilePos pos{0, BLOCK_SERIALIZATION_HEADER_SIZE};
BOOST_CHECK_EQUAL(blockman.SaveBlockToDisk(params->GenesisBlock(), 0, chain, *params, &pos).nPos, BLOCK_SERIALIZATION_HEADER_SIZE);
// now simulate what happens after reindex for the first new block processed
// the actual block contents don't matter, just that it's a block.
// verify that the write position is at offset 0x12d.
// this is a check to make sure that https://github.com/bitcoin/bitcoin/issues/21379 does not recur
// 8 bytes (for serialization header) + 285 (for serialized genesis block) = 293
// add another 8 bytes for the second block's serialization header and we get 293 + 8 = 301
FlatFilePos actual{blockman.SaveBlockToDisk(params->GenesisBlock(), 1, chain, *params, nullptr)};
BOOST_CHECK_EQUAL(actual.nPos, BLOCK_SERIALIZATION_HEADER_SIZE + ::GetSerializeSize(params->GenesisBlock(), CLIENT_VERSION) + BLOCK_SERIALIZATION_HEADER_SIZE);
}

BOOST_AUTO_TEST_SUITE_END()
13 changes: 12 additions & 1 deletion src/validation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4705,7 +4705,18 @@ void CChainState::LoadExternalBlockFile(
}
}
} catch (const std::exception& e) {
LogPrintf("%s: Deserialize or I/O error - %s\n", __func__, e.what());
// historical bugs added extra data to the block files that does not deserialize cleanly.
// commonly this data is between readable blocks, but it does not really matter. such data is not fatal to the import process.
// the code that reads the block files deals with invalid data by simply ignoring it.
// it continues to search for the next {4 byte magic message start bytes + 4 byte length + block} that does deserialize cleanly
// and passes all of the other block validation checks dealing with POW and the merkle root, etc...
// we merely note with this informational log message when unexpected data is encountered.
// we could also be experiencing a storage system read error, or a read of a previous bad write. these are possible, but
// less likely scenarios. we don't have enough information to tell a difference here.
// the reindex process is not the place to attempt to clean and/or compact the block files. if so desired, a studious node operator
// may use knowledge of the fact that the block files are not entirely pristine in order to prepare a set of pristine, and
// perhaps ordered, block files for later reindexing.
LogPrint(BCLog::REINDEX, "%s: unexpected data at file offset 0x%x - %s. continuing\n", __func__, (nRewind - 1), e.what());
}
}
} catch (const std::runtime_error& e) {
Expand Down

0 comments on commit b04b71a

Please sign in to comment.