From 6b7dc5f840c78122e7e352bf65ce8a8310e28068 Mon Sep 17 00:00:00 2001 From: Marcos Pernambuco Motta <1091485+mpernambuco@users.noreply.github.com> Date: Thu, 4 Jul 2024 15:14:51 -0300 Subject: [PATCH 1/3] fix: Homebrew build --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 4e35a9e45..5374afd01 100644 --- a/src/Makefile +++ b/src/Makefile @@ -66,7 +66,7 @@ CXX=clang++ AR=libtool -static -o INCS= -BREW_PREFIX=$(shell which brew) +BREW_PREFIX=$(shell brew --prefix) PORT_PREFIX=$(shell which port) ifeq ($(MACOSX_DEPLOYMENT_TARGET),) From 0721f0a26b2a3b2822d56f298ef8ac3872fb1eab Mon Sep 17 00:00:00 2001 From: Marcos Pernambuco Motta <1091485+mpernambuco@users.noreply.github.com> Date: Fri, 5 Jul 2024 18:13:10 -0300 Subject: [PATCH 2/3] feat: increase tree leaf log2 size to 5 --- src/Makefile | 4 +- src/access-log.h | 35 +++-- src/cartesi/util.lua | 20 ++- src/clua-cartesi.cpp | 3 + src/clua-machine-util.cpp | 23 ++-- src/json-util.cpp | 16 ++- src/machine-c-defines.h | 2 +- src/machine-merkle-tree.h | 2 +- src/record-state-access.h | 89 ++++++------- src/replay-state-access.h | 191 +++++++++++++-------------- src/send-cmio-response.cpp | 6 +- src/uarch-record-state-access.h | 90 +++++++------ src/uarch-replay-state-access.h | 207 +++++++++++++++--------------- tests/lua/cartesi/tests/util.lua | 31 ++++- tests/lua/machine-bind.lua | 162 +++++++++++++++-------- tests/lua/mcycle-overflow.lua | 4 +- tests/misc/test-machine-c-api.cpp | 4 +- tests/misc/test-utils.h | 4 +- 18 files changed, 513 insertions(+), 380 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5374afd01..0c26e7c50 100644 --- a/src/Makefile +++ b/src/Makefile @@ -18,7 +18,7 @@ EMULATOR_MARCHID=17 # Every new emulator release should bump these constants EMULATOR_VERSION_MAJOR=0 -EMULATOR_VERSION_MINOR=17 +EMULATOR_VERSION_MINOR=18 EMULATOR_VERSION_PATCH=0 EMULATOR_VERSION_LABEL= @@ -66,7 +66,7 @@ CXX=clang++ AR=libtool -static -o INCS= -BREW_PREFIX=$(shell brew --prefix) +BREW_PREFIX=$(shell which brew) PORT_PREFIX=$(shell which port) ifeq ($(MACOSX_DEPLOYMENT_TARGET),) diff --git a/src/access-log.h b/src/access-log.h index 1fcf200eb..716abcff5 100644 --- a/src/access-log.h +++ b/src/access-log.h @@ -28,6 +28,7 @@ #include "bracket-note.h" #include "machine-merkle-tree.h" +#include "strict-aliasing.h" namespace cartesi { @@ -46,11 +47,14 @@ static inline void set_word_access_data(uint64_t w, access_data &ad) { ad.insert(ad.end(), p, p + sizeof(w)); } -static inline uint64_t get_word_access_data(const access_data &ad) { - assert(ad.size() == 8); - uint64_t w = 0; - memcpy(&w, ad.data(), sizeof(w)); - return w; +static inline void replace_word_access_data(uint64_t w, access_data &ad, int offset = 0) { + assert(ad.size() >= offset + sizeof(uint64_t)); + aliased_aligned_write(ad.data() + offset, w); +} + +static inline uint64_t get_word_access_data(const access_data &ad, int offset = 0) { + assert(ad.size() >= offset + sizeof(uint64_t)); + return aliased_aligned_read(ad.data() + offset); } /// \brief Records an access to the machine state @@ -165,18 +169,29 @@ class access { /// \param root_hash Hash to be used as the root of the proof. /// \return The corresponding proof proof_type make_proof(const hash_type root_hash) const { + // the access can be of data smaller than the merkle tree word size + // however, the proof must be at least as big as the merkle tree word size + const int proof_log2_size = std::max(m_log2_size, machine_merkle_tree::get_log2_word_size()); + // the proof address is the access address aligned to the merkle tree word size + const uint64_t proof_address = m_address & ~(machine_merkle_tree::get_word_size() - 1); if (!m_sibling_hashes.has_value()) { throw std::runtime_error("can't make proof if access doesn't have sibling hashes"); } // NOLINTNEXTLINE(bugprone-unchecked-optional-access) const auto &sibling_hashes = m_sibling_hashes.value(); - const int log2_root_size = m_log2_size + static_cast(sibling_hashes.size()); - proof_type proof(log2_root_size, m_log2_size); + const int log2_root_size = proof_log2_size + static_cast(sibling_hashes.size()); + if (m_read.has_value() && m_read.value().size() != (static_cast(1) << proof_log2_size)) { + throw std::runtime_error("access read data size is inconsistent with proof size"); + } + if (m_written.has_value() && m_written.value().size() != (static_cast(1) << proof_log2_size)) { + throw std::runtime_error("access written data size is inconsistent with proof size"); + } + proof_type proof(log2_root_size, proof_log2_size); proof.set_root_hash(root_hash); - proof.set_target_address(m_address); + proof.set_target_address(proof_address); proof.set_target_hash(m_read_hash); - for (int log2_size = m_log2_size; log2_size < log2_root_size; log2_size++) { - proof.set_sibling_hash(sibling_hashes[log2_size - m_log2_size], log2_size); + for (int log2_size = proof_log2_size; log2_size < log2_root_size; log2_size++) { + proof.set_sibling_hash(sibling_hashes[log2_size - proof_log2_size], log2_size); } return proof; } diff --git a/src/cartesi/util.lua b/src/cartesi/util.lua index 3fecf19b4..1af828f03 100644 --- a/src/cartesi/util.lua +++ b/src/cartesi/util.lua @@ -14,6 +14,8 @@ -- with this program (see COPYING). If not, see . -- +local cartesi = require("cartesi") + local _M = {} local function indentout(f, indent, fmt, ...) f:write(string.rep(" ", indent), string.format(fmt, ...)) end @@ -197,8 +199,16 @@ end local function hexhash8(hash) return string.sub(hexhash(hash), 1, 8) end -local function accessdatastring(data, data_hash, log2_size) - if log2_size == 3 then +local function accessdatastring(data, data_hash, data_log2_size, address) + local data_size = 1 << data_log2_size + if data_log2_size == 3 then + if data_size < #data then + -- access data is smaller than the tree leaf size + -- the logged data is the entire tree leaf, but we only need the data that was accessed + local leaf_aligned_addrss = address & ~((1 << cartesi.TREE_LOG2_WORD_SIZE) - 1) + local word_offset = address - leaf_aligned_addrss + data = data:sub(word_offset + 1, word_offset + data_size) + end data = string.unpack(" log2_root_size) { + luaL_error(L, "target size cannot be greater than root size"); + } const size_t sibling_hashes_count = log2_root_size - log2_target_size; if (sibling_hashes_count > 64) { luaL_error(L, "too many sibling hashes (expected max %d, got %d)", 64, static_cast(sibling_hashes_count)); @@ -433,19 +436,14 @@ static void check_cm_access(lua_State *L, int tabidx, bool proofs, cm_access *a, a->type = check_cm_access_type_field(L, tabidx, "type"); a->address = check_uint_field(L, tabidx, "address"); a->log2_size = check_int_field(L, tabidx, "log2_size"); - if (a->log2_size < CM_TREE_LOG2_WORD_SIZE || a->log2_size > CM_TREE_LOG2_ROOT_SIZE) { - luaL_error(L, "invalid log2_size (expected integer in {%d..%d})", CM_TREE_LOG2_WORD_SIZE, - CM_TREE_LOG2_ROOT_SIZE); - } - + const int expected_data_log2_size = std::max(a->log2_size, CM_TREE_LOG2_WORD_SIZE); if (opt_table_field(L, tabidx, "sibling_hashes")) { a->sibling_hashes = new cm_hash_array{}; - check_sibling_cm_hashes(L, -1, a->log2_size, CM_TREE_LOG2_ROOT_SIZE, a->sibling_hashes); + check_sibling_cm_hashes(L, -1, expected_data_log2_size, CM_TREE_LOG2_ROOT_SIZE, a->sibling_hashes); lua_pop(L, 1); } else if (proofs) { luaL_error(L, "missing sibling_hashes"); } - lua_getfield(L, tabidx, "read_hash"); clua_check_cm_hash(L, -1, &a->read_hash); lua_pop(L, 1); @@ -454,8 +452,8 @@ static void check_cm_access(lua_State *L, int tabidx, bool proofs, cm_access *a, clua_check_cm_hash(L, -1, &a->written_hash); lua_pop(L, 1); } - a->read_data = opt_cm_access_data_field(L, tabidx, "read", a->log2_size, &a->read_data_size); - a->written_data = opt_cm_access_data_field(L, tabidx, "written", a->log2_size, &a->written_data_size); + a->read_data = opt_cm_access_data_field(L, tabidx, "read", expected_data_log2_size, &a->read_data_size); + a->written_data = opt_cm_access_data_field(L, tabidx, "written", expected_data_log2_size, &a->written_data_size); } cm_access_log *clua_check_cm_access_log(lua_State *L, int tabidx, int ctxidx) { @@ -662,9 +660,10 @@ void clua_push_cm_access_log(lua_State *L, const cm_access_log *log) { } if (log->log_type.proofs && a->sibling_hashes != nullptr) { lua_newtable(L); - for (size_t log2_size = a->log2_size; log2_size < CM_TREE_LOG2_ROOT_SIZE; log2_size++) { - clua_push_cm_hash(L, &a->sibling_hashes->entry[log2_size - a->log2_size]); - lua_rawseti(L, -2, static_cast(log2_size - a->log2_size) + 1); + const int proof_log2_size = std::max(a->log2_size, CM_TREE_LOG2_WORD_SIZE); + for (size_t log2_size = proof_log2_size; log2_size < CM_TREE_LOG2_ROOT_SIZE; log2_size++) { + clua_push_cm_hash(L, &a->sibling_hashes->entry[log2_size - proof_log2_size]); + lua_rawseti(L, -2, static_cast(log2_size - proof_log2_size) + 1); } lua_setfield(L, -2, "sibling_hashes"); } diff --git a/src/json-util.cpp b/src/json-util.cpp index 59b7d702e..6d9540f8c 100644 --- a/src/json-util.cpp +++ b/src/json-util.cpp @@ -21,6 +21,7 @@ #include #include "base64.h" +#include "machine-merkle-tree.h" namespace cartesi { @@ -649,6 +650,9 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, access &access, con throw std::domain_error("field \""s + new_path + "log2_size\" is out of bounds"); } access.set_log2_size(static_cast(log2_size)); + // Minimum logged data size is merkle tree word size + const uint64_t data_log2_size = + std::max(log2_size, static_cast(machine_merkle_tree::get_log2_word_size())); uint64_t address = 0; ju_get_field(jk, "address"s, address, new_path); access.set_address(address); @@ -665,7 +669,7 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, access &access, con std::optional read; ju_get_opt_field(jk, "read"s, read, new_path); if (read.has_value()) { - if (read.value().size() != (UINT64_C(1) << log2_size)) { + if (read.value().size() != (UINT64_C(1) << data_log2_size)) { throw std::invalid_argument("field \""s + new_path + "written\" has wrong length"); } access.set_read(std::move(read.value())); @@ -674,20 +678,18 @@ void ju_get_opt_field(const nlohmann::json &j, const K &key, access &access, con std::optional written; ju_get_opt_field(jk, "written"s, written, new_path); if (written.has_value()) { - if (written.value().size() != (UINT64_C(1) << log2_size)) { + if (written.value().size() != (UINT64_C(1) << data_log2_size)) { throw std::invalid_argument("field \""s + new_path + "written\" has wrong length"); } access.set_written(std::move(written.value())); } } - not_default_constructible proof; - ju_get_opt_field(jk, "proof"s, proof, new_path); if (contains(jk, "sibling_hashes")) { access.get_sibling_hashes().emplace(); // NOLINTNEXTLINE(bugprone-unchecked-optional-access) auto &sibling_hashes = access.get_sibling_hashes().value(); ju_get_vector_like_field(jk, "sibling_hashes"s, sibling_hashes, new_path); - auto expected_depth = static_cast(machine_merkle_tree::get_log2_root_size() - access.get_log2_size()); + auto expected_depth = static_cast(machine_merkle_tree::get_log2_root_size() - data_log2_size); if (sibling_hashes.size() != expected_depth) { throw std::invalid_argument("field \""s + new_path + "sibling_hashes\" has wrong length"); } @@ -1208,7 +1210,9 @@ void to_json(nlohmann::json &j, const access &a) { if (a.get_sibling_hashes().has_value()) { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) const auto &sibling_hashes = a.get_sibling_hashes().value(); - auto depth = machine_merkle_tree::get_log2_root_size() - a.get_log2_size(); + // Minimum logged data size is merkle tree word size + auto data_log2_size = std::max(a.get_log2_size(), machine_merkle_tree::get_log2_word_size()); + auto depth = machine_merkle_tree::get_log2_root_size() - data_log2_size; nlohmann::json s = nlohmann::json::array(); for (int i = 0; i < depth; i++) { s.push_back(encode_base64(sibling_hashes[i])); diff --git a/src/machine-c-defines.h b/src/machine-c-defines.h index b899e1b36..f51b6b286 100644 --- a/src/machine-c-defines.h +++ b/src/machine-c-defines.h @@ -27,7 +27,7 @@ #define CM_MACHINE_F_REG_COUNT 32 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) #define CM_MACHINE_UARCH_X_REG_COUNT 32 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) -#define CM_TREE_LOG2_WORD_SIZE 3 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) +#define CM_TREE_LOG2_WORD_SIZE 5 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) #define CM_TREE_LOG2_PAGE_SIZE 12 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) #define CM_TREE_LOG2_ROOT_SIZE 64 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) #define CM_FLASH_DRIVE_CONFIGS_MAX_SIZE 8 // NOLINT(cppcoreguidelines-macro-usage, modernize-macro-to-enum) diff --git a/src/machine-merkle-tree.h b/src/machine-merkle-tree.h index ba6042019..8f26c158e 100644 --- a/src/machine-merkle-tree.h +++ b/src/machine-merkle-tree.h @@ -71,7 +71,7 @@ class machine_merkle_tree final { /// \brief LOG2_WORD_SIZE Number of bits covered by a word. /// I.e., log2 of number of bytes subintended by the /// the deepest tree nodes. - static constexpr int LOG2_WORD_SIZE = 3; + static constexpr int LOG2_WORD_SIZE = 5; /// \brief DEPTH Depth of Merkle tree. static constexpr int DEPTH = LOG2_ROOT_SIZE - LOG2_WORD_SIZE; diff --git a/src/record-state-access.h b/src/record-state-access.h index 192b24354..b57494340 100644 --- a/src/record-state-access.h +++ b/src/record-state-access.h @@ -44,7 +44,7 @@ class record_state_access : public i_state_access::value, - "Machine and machine_merkle_tree word sizes must match"); - assert((paligned & (sizeof(uint64_t) - 1)) == 0); + void log_read(uint64_t paligned, const char *text) const { + static_assert(machine_merkle_tree::get_log2_word_size() >= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if ((paligned & (sizeof(uint64_t) - 1)) != 0) { + throw std::invalid_argument{"paligned is not aligned to word size"}; + } + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); access a; if (m_log->get_log_type().has_proofs()) { // We can skip updating the merkle tree while getting the proof because we assume that: // 1) A full merkle tree update was called at the beginning of machine::log_load_cmio_input() // 2) We called update_merkle_tree_page on all write accesses const auto proof = - m_m.get_proof(paligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); - + m_m.get_proof(pleaf_aligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); // We just store the sibling hashes in the access because this is the only missing piece of data needed to // reconstruct the proof a.set_sibling_hashes(proof.get_sibling_hashes()); } a.set_type(access_type::read); a.set_address(paligned); - a.set_log2_size(machine_merkle_tree::get_log2_word_size()); + a.set_log2_size(log2_size::value); + // NOLINTBEGIN(bugprone-unchecked-optional-access) + // we log the leaf data at pleaf_aligned that contains the word at paligned a.get_read().emplace(); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - set_word_access_data(val, a.get_read().value()); - - hash_type read_hash; - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - get_hash(a.get_read().value(), read_hash); - a.set_read_hash(read_hash); - + a.get_read().value().resize(machine_merkle_tree::get_word_size()); + // read the entire leaf where the word is located + m_m.read_memory(pleaf_aligned, a.get_read().value().data(), machine_merkle_tree::get_word_size()); + get_hash(a.get_read().value(), a.get_read_hash()); + // NOLINTEND(bugprone-unchecked-optional-access) m_log->push_access(std::move(a), text); - return val; } /// \brief Logs a write access before it happens. /// \param paligned Physical address of the word in the machine state (Must be aligned to a 64-bit word). - /// \param dest Value before writing. /// \param val Value to write. /// \param text Textual description of the access. - void log_before_write(uint64_t paligned, uint64_t dest, uint64_t val, const char *text) { - static_assert(machine_merkle_tree::get_log2_word_size() == log2_size::value, - "Machine and machine_merkle_tree word sizes must match"); - assert((paligned & (sizeof(uint64_t) - 1)) == 0); + void log_before_write(uint64_t paligned, uint64_t val, const char *text) { + static_assert(machine_merkle_tree::get_log2_word_size() >= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if ((paligned & (sizeof(uint64_t) - 1)) != 0) { + throw std::invalid_argument{"paligned is not aligned to word size"}; + } + // address of the leaf that contains the word at paligned + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); access a; if (m_log->get_log_type().has_proofs()) { // We can skip updating the merkle tree while getting the proof because we assume that: // 1) A full merkle tree update was called at the beginning of machine::log_load_cmio_input() // 2) We called update_merkle_tree_page on all write accesses const auto proof = - m_m.get_proof(paligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); + m_m.get_proof(pleaf_aligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); // We just store the sibling hashes in the access because this is the only missing piece of data needed to // reconstruct the proof a.set_sibling_hashes(proof.get_sibling_hashes()); } a.set_type(access_type::write); a.set_address(paligned); - a.set_log2_size(machine_merkle_tree::get_log2_word_size()); - + a.set_log2_size(log2_size::value); // NOLINTBEGIN(bugprone-unchecked-optional-access) + // we log the entire leaf where the word is located a.get_read().emplace(); - set_word_access_data(dest, a.get_read().value()); + a.get_read().value().resize(machine_merkle_tree::get_word_size()); + m_m.read_memory(pleaf_aligned, a.get_read().value().data(), machine_merkle_tree::get_word_size()); get_hash(a.get_read().value(), a.get_read_hash()); - - a.get_written().emplace(); - set_word_access_data(val, a.get_written().value()); + // the logged written data is the same as the read data, but with the word at paligned replaced by word + a.set_written(access_data(a.get_read().value())); // copy the read data + const int word_offset = static_cast(paligned - pleaf_aligned); // offset of word in leaf + replace_word_access_data(val, a.get_written().value(), word_offset); // replace the word + // compute the hash of the written data a.get_written_hash().emplace(); get_hash(a.get_written().value(), a.get_written_hash().value()); // NOLINTEND(bugprone-unchecked-optional-access) - m_log->push_access(std::move(a), text); } @@ -171,12 +175,12 @@ class record_state_access : public i_state_accessget_log_type().has_large_data()) { access_data &data = a.get_written().emplace(write_length); memcpy(data.data(), pma.get_memory().get_host_memory(), write_length); diff --git a/src/replay-state-access.h b/src/replay-state-access.h index 608152e98..8142e47df 100644 --- a/src/replay-state-access.h +++ b/src/replay-state-access.h @@ -24,10 +24,12 @@ #include #include +#include "access-log.h" #include "i-state-access.h" #include "machine-merkle-tree.h" #include "machine.h" #include "shadow-state.h" +#include "uarch-solidity-compat.h" #include "unique-c-ptr.h" namespace cartesi { @@ -107,16 +109,7 @@ class replay_state_access : public i_state_access= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if (paligned & (sizeof(uint64_t) - 1)) { + throw std::invalid_argument{"address not aligned to word size"}; + } if (m_next_access >= m_accesses.size()) { throw std::invalid_argument{"too few accesses in log"}; } const auto &access = m_accesses[m_next_access]; - if ((paligned & ((UINT64_C(1) << log2_size) - 1)) != 0) { - throw std::invalid_argument{"access address not aligned to size"}; + if (access.get_type() != access_type::read) { + throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read " + text}; } if (access.get_address() != paligned) { std::ostringstream err; - err << "expected access " << access_to_report() << " to read " << text << " at address 0x" << std::hex + err << "expected access " << access_to_report() << " to read " << text << " address 0x" << std::hex << paligned << "(" << std::dec << paligned << ")"; throw std::invalid_argument{err.str()}; } - if (log2_size < 3 || log2_size > 63) { - throw std::invalid_argument{"invalid access size"}; - } - if (access.get_log2_size() != log2_size) { + if (access.get_log2_size() != log2_size::value) { throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read 2^" + - std::to_string(log2_size) + " bytes from " + text}; - } - if (access.get_type() != access_type::read) { - throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read " + text}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes from " + text}; } if (!access.get_read().has_value()) { throw std::invalid_argument{ "missing read " + std::string(text) + " data at access " + std::to_string(access_to_report())}; } - const auto &value_read = access.get_read().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_read.size() != UINT64_C(1) << log2_size) { + // NOLINTBEGIN(bugprone-unchecked-optional-access) + const auto &read_data = access.get_read().value(); + if (read_data.size() != machine_merkle_tree::get_word_size()) { throw std::invalid_argument{"expected read " + std::string(text) + " data to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes at access " + + std::to_string(access_to_report())}; } // check if logged read data hashes to the logged read hash - hash_type computed_hash{}; - get_hash(m_hasher, value_read, computed_hash); - if (access.get_read_hash() != computed_hash) { + hash_type computed_read_hash{}; + get_hash(m_hasher, read_data, computed_read_hash); + if (access.get_read_hash() != computed_read_hash) { throw std::invalid_argument{"logged read data of " + std::string(text) + " data does not hash to the logged read hash at access " + std::to_string(access_to_report())}; } @@ -171,9 +165,11 @@ class replay_state_access : public i_state_access(paligned - pleaf_aligned); + return get_word_access_data(read_data, word_offset); } /// \brief Checks a logged word write and advances log. @@ -181,82 +177,87 @@ class replay_state_access : public i_state_access= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if (paligned & (sizeof(uint64_t) - 1)) { + throw std::invalid_argument{"paligned not aligned to word size"}; + } if (m_next_access >= m_accesses.size()) { throw std::invalid_argument{"too few accesses in log"}; } const auto &access = m_accesses[m_next_access]; - if ((paligned & ((UINT64_C(1) << log2_size) - 1)) != 0) { - throw std::invalid_argument{"access address not aligned to size"}; + if (access.get_type() != access_type::write) { + throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write " + text}; } if (access.get_address() != paligned) { std::ostringstream err; - err << "expected access " << access_to_report() << " to write " << text << " at address 0x" << std::hex + err << "expected access " << access_to_report() << " to write " << text << " to address 0x" << std::hex << paligned << "(" << std::dec << paligned << ")"; throw std::invalid_argument{err.str()}; } - if (log2_size < 3 || log2_size > 63) { - throw std::invalid_argument{"invalid access size"}; - } - if (access.get_log2_size() != log2_size) { + if (access.get_log2_size() != log2_size::value) { throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write 2^" + - std::to_string(log2_size) + " bytes from " + text}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes to " + text}; } - if (access.get_type() != access_type::write) { - throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write " + text}; + // NOLINTBEGIN(bugprone-unchecked-optional-access) + // check read + if (!access.get_read().has_value()) { + throw std::invalid_argument{ + "missing read " + std::string(text) + " data at access " + std::to_string(access_to_report())}; } - if (access.get_read().has_value()) { - const auto &value_read = access.get_read().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_read.size() != UINT64_C(1) << log2_size) { - throw std::invalid_argument{"expected overwritten data from " + std::string(text) + " to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; - } - // check if read data hashes to the logged read hash - hash_type computed_hash{}; - get_hash(m_hasher, value_read, computed_hash); - if (access.get_read_hash() != computed_hash) { - throw std::invalid_argument{"logged read data of " + std::string(text) + - " does not hash to the logged read hash at access " + std::to_string(access_to_report())}; - } + const auto &read_data = access.get_read().value(); + if (read_data.size() != machine_merkle_tree::get_word_size()) { + throw std::invalid_argument{"expected overwritten data from " + std::string(text) + " to contain 2^" + + std::to_string(access.get_log2_size()) + " bytes at access " + std::to_string(access_to_report())}; + } + // check if read data hashes to the logged read hash + hash_type computed_read_hash{}; + get_hash(m_hasher, read_data, computed_read_hash); + if (access.get_read_hash() != computed_read_hash) { + throw std::invalid_argument{"logged read data of " + std::string(text) + + " does not hash to the logged read hash at access " + std::to_string(access_to_report())}; } + // check write if (!access.get_written_hash().has_value()) { throw std::invalid_argument{ "missing written " + std::string(text) + " hash at access " + std::to_string(access_to_report())}; } - const auto &written_hash = access.get_written_hash().value(); // NOLINT(bugprone-unchecked-optional-access) - // check if value being written hashes to the logged written hash - hash_type computed_hash{}; - get_hash(m_hasher, val, computed_hash); - if (written_hash != computed_hash) { - throw std::invalid_argument{"value being written to " + std::string(text) + + const auto &written_hash = access.get_written_hash().value(); + if (!access.get_written().has_value()) { + throw std::invalid_argument{ + "missing written " + std::string(text) + " data at access " + std::to_string(access_to_report())}; + } + const auto &written_data = access.get_written().value(); + if (written_data.size() != read_data.size()) { + throw std::invalid_argument{"expected written " + std::string(text) + " data to contain 2^" + + std::to_string(access.get_log2_size()) + " bytes at access " + std::to_string(access_to_report())}; + } + // check if written data hashes to the logged written hash + hash_type computed_written_hash{}; + get_hash(m_hasher, written_data, computed_written_hash); + if (written_hash != computed_written_hash) { + throw std::invalid_argument{"logged written data of " + std::string(text) + " does not hash to the logged written hash at access " + std::to_string(access_to_report())}; } - if (access.get_written().has_value()) { - const auto &value_written = access.get_written().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_written.size() != UINT64_C(1) << log2_size) { - throw std::invalid_argument{"expected written " + std::string(text) + " data to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; - } - // check if written data hashes to the logged written hash - get_hash(m_hasher, value_written, computed_hash); - if (written_hash != computed_hash) { - throw std::invalid_argument{"logged written data of " + std::string(text) + - " does not hash to the logged written hash at access " + std::to_string(access_to_report())}; - } - } + // check if word being written matches the logged data + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); + const int word_offset = static_cast(paligned - pleaf_aligned); + const uint64_t logged_word = get_word_access_data(written_data, word_offset); + if (word != logged_word) { + throw std::invalid_argument{"value being written to " + std::string(text) + + " does not match the logged written value at access " + std::to_string(access_to_report())}; + } + // check if logged written data differs from the logged read data only by the written word + access_data expected_written_data(read_data); // make a copy of read data + replace_word_access_data(word, expected_written_data, word_offset); // patch with written word + if (written_data != expected_written_data) { + throw std::invalid_argument{"logged written data of " + std::string(text) + + " doesn't differ from the logged read data only by the written word at access " + + std::to_string(access_to_report())}; + } + // NOLINTEND(bugprone-unchecked-optional-access) + // check proof if (m_verify_proofs) { auto proof = access.make_proof(m_root_hash); if (!proof.verify(m_hasher)) { @@ -271,18 +272,18 @@ class replay_state_access : public i_state_access(write_length, std::nothrow_t{}); @@ -335,7 +336,8 @@ class replay_state_access : public i_state_access data_length) { memset(scratch.get() + data_length, 0, write_length - data_length); } - get_merkle_tree_hash(hasher, scratch.get(), write_length, sizeof(uint64_t), computed_data_hash); + get_merkle_tree_hash(hasher, scratch.get(), write_length, machine_merkle_tree::get_word_size(), + computed_data_hash); // check if logged written hash matches the computed data hash if (written_hash != computed_data_hash) { throw std::invalid_argument{"logged written hash of " + text + @@ -344,7 +346,7 @@ class replay_state_access : public i_state_access 0) { - // Find the write length: the smallest power of 2 that is >= length and >= word size + // Find the write length: the smallest power of 2 that is >= dataLength and >= tree leaf size uint32 writeLengthLog2Size = uint32Log2(dataLength); - if (writeLengthLog2Size < 3) { - writeLengthLog2Size = 3; // minimum write size is a word + if (writeLengthLog2Size < machine_merkle_tree::get_log2_word_size()) { + writeLengthLog2Size = 5; // minimum write size is the tree leaf size } if (uint32ShiftLeft(1, writeLengthLog2Size) < dataLength) { writeLengthLog2Size += 1; diff --git a/src/uarch-record-state-access.h b/src/uarch-record-state-access.h index 4cb5fd06d..f59316679 100644 --- a/src/uarch-record-state-access.h +++ b/src/uarch-record-state-access.h @@ -19,18 +19,19 @@ /// \file /// \brief State access implementation that record and logs all accesses - #include #include "i-uarch-state-access.h" #include "machine-merkle-tree.h" + #include "machine.h" #include "uarch-bridge.h" #include "uarch-constants.h" #include "uarch-machine.h" + #include "uarch-pristine-state-hash.h" +#include "uarch-solidity-compat.h" #include "unique-c-ptr.h" - namespace cartesi { /// \details The uarch_record_state_access logs all access to the machine state. @@ -73,7 +74,7 @@ class uarch_record_state_access : public i_uarch_state_accesspush_bracket(bracket_type::end, m_text.c_str()); @@ -154,77 +155,88 @@ class uarch_record_state_access : public i_uarch_state_access::value, - "Machine and machine_merkle_tree word sizes must match"); - assert((paligned & (sizeof(uint64_t) - 1)) == 0); + static_assert(machine_merkle_tree::get_log2_word_size() >= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if ((paligned & (sizeof(uint64_t) - 1)) != 0) { + throw std::invalid_argument{"paligned is not aligned to word size"}; + } + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); access a; if (m_log->get_log_type().has_proofs()) { // We can skip updating the merkle tree while getting the proof because we assume that: // 1) A full merkle tree update was called at the beginning of machine::log_uarch_step() // 2) We called update_merkle_tree_page on all write accesses const auto proof = - m_m.get_proof(paligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); - + m_m.get_proof(pleaf_aligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); // We just store the sibling hashes in the access because this is the only missing piece of data needed to // reconstruct the proof a.set_sibling_hashes(proof.get_sibling_hashes()); } a.set_type(access_type::read); a.set_address(paligned); - a.set_log2_size(machine_merkle_tree::get_log2_word_size()); + a.set_log2_size(log2_size::value); + // NOLINTBEGIN(bugprone-unchecked-optional-access) + // we log the leaf data at pleaf_aligned that contains the word at paligned a.get_read().emplace(); - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - set_word_access_data(val, a.get_read().value()); - - hash_type read_hash; - // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - get_hash(a.get_read().value(), read_hash); - a.set_read_hash(read_hash); - + a.get_read().value().resize(machine_merkle_tree::get_word_size()); + // read the entire leaf where the word is located + m_m.read_memory(pleaf_aligned, a.get_read().value().data(), machine_merkle_tree::get_word_size()); + get_hash(a.get_read().value(), a.get_read_hash()); + // ensure that the read data is the same as the value read + const int word_offset = static_cast(paligned - pleaf_aligned); // offset of word in leaf + const uint64_t logged_val = get_word_access_data(a.get_read().value(), word_offset); + if (val != logged_val) { + throw std::runtime_error("read value does not match logged value"); + } + // NOLINTEND(bugprone-unchecked-optional-access) m_log->push_access(std::move(a), text); return val; } - /// \brief Logs a write access before it happens. + /// \brief Logs a write access to a uint64_t word before it happens. /// \param paligned Physical address of the word in the machine state (Must be aligned to a 64-bit word). - /// \param dest Value before writing. /// \param val Value to write. /// \param text Textual description of the access. - void log_before_write(uint64_t paligned, uint64_t dest, uint64_t val, const char *text) { - static_assert(machine_merkle_tree::get_log2_word_size() == log2_size::value, - "Machine and machine_merkle_tree word sizes must match"); - assert((paligned & (sizeof(uint64_t) - 1)) == 0); + void log_before_write(uint64_t paligned, uint64_t word, const char *text) { + static_assert(machine_merkle_tree::get_log2_word_size() >= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if ((paligned & (sizeof(uint64_t) - 1)) != 0) { + throw std::invalid_argument{"paligned is not aligned to word size"}; + } + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); access a; if (m_log->get_log_type().has_proofs()) { // We can skip updating the merkle tree while getting the proof because we assume that: // 1) A full merkle tree update was called at the beginning of machine::log_uarch_step() // 2) We called update_merkle_tree_page on all write accesses const auto proof = - m_m.get_proof(paligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); + m_m.get_proof(pleaf_aligned, machine_merkle_tree::get_log2_word_size(), skip_merkle_tree_update); // We just store the sibling hashes in the access because this is the only missing piece of data needed to // reconstruct the proof a.set_sibling_hashes(proof.get_sibling_hashes()); } a.set_type(access_type::write); a.set_address(paligned); - a.set_log2_size(machine_merkle_tree::get_log2_word_size()); - + a.set_log2_size(log2_size::value); // NOLINTBEGIN(bugprone-unchecked-optional-access) + // we log the leaf data at pleaf_aligned that contains the word at paligned a.get_read().emplace(); - set_word_access_data(dest, a.get_read().value()); + a.get_read().value().resize(machine_merkle_tree::get_word_size()); + m_m.read_memory(pleaf_aligned, a.get_read().value().data(), machine_merkle_tree::get_word_size()); get_hash(a.get_read().value(), a.get_read_hash()); - - a.get_written().emplace(); - set_word_access_data(val, a.get_written().value()); + // the logged written data is the same as the read data, but with the word at paligned replaced by word + a.set_written(access_data(a.get_read().value())); // copy the read data + const int word_offset = static_cast(paligned - pleaf_aligned); // offset of word in leaf + replace_word_access_data(word, a.get_written().value(), word_offset); // replace the word + // compute the hash of the written data a.get_written_hash().emplace(); get_hash(a.get_written().value(), a.get_written_hash().value()); // NOLINTEND(bugprone-unchecked-optional-access) - m_log->push_access(std::move(a), text); } @@ -244,10 +256,10 @@ class uarch_record_state_access : public i_uarch_state_access(hdata); // Log the write access - log_before_write(paddr, old_data, data, "memory"); + log_before_write(paddr, data, "memory"); // Actually modify the state aliased_aligned_write(hdata, data); @@ -380,9 +391,8 @@ class uarch_record_state_access : public i_uarch_state_access #include "i-uarch-state-access.h" +#include "machine-merkle-tree.h" +#include "machine.h" #include "shadow-state.h" #include "uarch-bridge.h" @@ -102,7 +104,7 @@ class uarch_replay_state_access : public i_uarch_state_access= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if (paligned & (sizeof(uint64_t) - 1)) { + throw std::invalid_argument{"address not aligned to word size"}; + } if (m_next_access >= m_accesses.size()) { throw std::invalid_argument{"too few accesses in log"}; } const auto &access = m_accesses[m_next_access]; - if ((paligned & ((UINT64_C(1) << log2_size) - 1)) != 0) { - throw std::invalid_argument{"access address not aligned to size"}; + if (access.get_type() != access_type::read) { + throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read " + text}; } if (access.get_address() != paligned) { std::ostringstream err; - err << "expected access " << access_to_report() << " to read " << text << " at address 0x" << std::hex + err << "expected access " << access_to_report() << " to read " << text << " address 0x" << std::hex << paligned << "(" << std::dec << paligned << ")"; throw std::invalid_argument{err.str()}; } - if (log2_size < 3 || log2_size > 63) { - throw std::invalid_argument{"invalid access size"}; - } - if (access.get_log2_size() != log2_size) { + if (access.get_log2_size() != log2_size::value) { throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read 2^" + - std::to_string(log2_size) + " bytes from " + text}; - } - if (access.get_type() != access_type::read) { - throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to read " + text}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes from " + text}; } if (!access.get_read().has_value()) { throw std::invalid_argument{ "missing read " + std::string(text) + " data at access " + std::to_string(access_to_report())}; } - const auto &value_read = access.get_read().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_read.size() != UINT64_C(1) << log2_size) { + // NOLINTBEGIN(bugprone-unchecked-optional-access) + const auto &read_data = access.get_read().value(); + if (read_data.size() != machine_merkle_tree::get_word_size()) { throw std::invalid_argument{"expected read " + std::string(text) + " data to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes at access " + + std::to_string(access_to_report())}; } // check if logged read data hashes to the logged read hash - hash_type computed_hash{}; - get_hash(m_hasher, value_read, computed_hash); - if (access.get_read_hash() != computed_hash) { + hash_type computed_read_hash{}; + get_hash(m_hasher, read_data, computed_read_hash); + if (access.get_read_hash() != computed_read_hash) { throw std::invalid_argument{"logged read data of " + std::string(text) + " data does not hash to the logged read hash at access " + std::to_string(access_to_report())}; } @@ -167,8 +160,10 @@ class uarch_replay_state_access : public i_uarch_state_access(paligned - pleaf_aligned); + return get_word_access_data(read_data, word_offset); + // NOLINTEND(bugprone-unchecked-optional-access) } /// \brief Checks a logged word write and advances log. @@ -176,82 +171,87 @@ class uarch_replay_state_access : public i_uarch_state_access= log2_size::value, + "Merkle tree word size must be at least as large as a machine word"); + if (paligned & (sizeof(uint64_t) - 1)) { + throw std::invalid_argument{"paligned not aligned to word size"}; + } if (m_next_access >= m_accesses.size()) { throw std::invalid_argument{"too few accesses in log"}; } const auto &access = m_accesses[m_next_access]; - if ((paligned & ((UINT64_C(1) << log2_size) - 1)) != 0) { - throw std::invalid_argument{"access address not aligned to size"}; + if (access.get_type() != access_type::write) { + throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write " + text}; } if (access.get_address() != paligned) { std::ostringstream err; - err << "expected access " << access_to_report() << " to write " << text << " at address 0x" << std::hex + err << "expected access " << access_to_report() << " to write " << text << " to address 0x" << std::hex << paligned << "(" << std::dec << paligned << ")"; throw std::invalid_argument{err.str()}; } - if (log2_size < 3 || log2_size > 63) { - throw std::invalid_argument{"invalid access size"}; - } - if (access.get_log2_size() != log2_size) { + if (access.get_log2_size() != log2_size::value) { throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write 2^" + - std::to_string(log2_size) + " bytes from " + text}; + std::to_string(machine_merkle_tree::get_log2_word_size()) + " bytes to " + text}; } - if (access.get_type() != access_type::write) { - throw std::invalid_argument{"expected access " + std::to_string(access_to_report()) + " to write " + text}; + // NOLINTBEGIN(bugprone-unchecked-optional-access) + // check read + if (!access.get_read().has_value()) { + throw std::invalid_argument{ + "missing read " + std::string(text) + " data at access " + std::to_string(access_to_report())}; } - if (access.get_read().has_value()) { - const auto &value_read = access.get_read().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_read.size() != UINT64_C(1) << log2_size) { - throw std::invalid_argument{"expected overwritten data from " + std::string(text) + " to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; - } - // check if read data hashes to the logged read hash - hash_type computed_hash{}; - get_hash(m_hasher, value_read, computed_hash); - if (access.get_read_hash() != computed_hash) { - throw std::invalid_argument{"logged read data of " + std::string(text) + - " does not hash to the logged read hash at access " + std::to_string(access_to_report())}; - } + const auto &read_data = access.get_read().value(); + if (read_data.size() != machine_merkle_tree::get_word_size()) { + throw std::invalid_argument{"expected overwritten data from " + std::string(text) + " to contain 2^" + + std::to_string(access.get_log2_size()) + " bytes at access " + std::to_string(access_to_report())}; } + // check if read data hashes to the logged read hash + hash_type computed_read_hash{}; + get_hash(m_hasher, read_data, computed_read_hash); + if (access.get_read_hash() != computed_read_hash) { + throw std::invalid_argument{"logged read data of " + std::string(text) + + " does not hash to the logged read hash at access " + std::to_string(access_to_report())}; + } + // check write if (!access.get_written_hash().has_value()) { throw std::invalid_argument{ "missing written " + std::string(text) + " hash at access " + std::to_string(access_to_report())}; } - const auto &written_hash = access.get_written_hash().value(); // NOLINT(bugprone-unchecked-optional-access) - // check if value being written hashes to the logged written hash - hash_type computed_hash{}; - get_hash(m_hasher, val, computed_hash); - if (written_hash != computed_hash) { - throw std::invalid_argument{"value being written to " + std::string(text) + + const auto &written_hash = access.get_written_hash().value(); + if (!access.get_written().has_value()) { + throw std::invalid_argument{ + "missing written " + std::string(text) + " data at access " + std::to_string(access_to_report())}; + } + const auto &written_data = access.get_written().value(); + if (written_data.size() != read_data.size()) { + throw std::invalid_argument{"expected written " + std::string(text) + " data to contain 2^" + + std::to_string(access.get_log2_size()) + " bytes at access " + std::to_string(access_to_report())}; + } + // check if written data hashes to the logged written hash + hash_type computed_written_hash{}; + get_hash(m_hasher, written_data, computed_written_hash); + if (written_hash != computed_written_hash) { + throw std::invalid_argument{"logged written data of " + std::string(text) + " does not hash to the logged written hash at access " + std::to_string(access_to_report())}; } - if (access.get_written().has_value()) { - const auto &value_written = access.get_written().value(); // NOLINT(bugprone-unchecked-optional-access) - if (value_written.size() != UINT64_C(1) << log2_size) { - throw std::invalid_argument{"expected written " + std::string(text) + " data to contain 2^" + - std::to_string(log2_size) + " bytes at access " + std::to_string(access_to_report())}; - } - // check if written data hashes to the logged written hash - get_hash(m_hasher, value_written, computed_hash); - if (written_hash != computed_hash) { - throw std::invalid_argument{"logged written data of " + std::string(text) + - " does not hash to the logged written hash at access " + std::to_string(access_to_report())}; - } - } + // check if word being written matches the logged data + const uint64_t pleaf_aligned = paligned & ~(machine_merkle_tree::get_word_size() - 1); + const int word_offset = static_cast(paligned - pleaf_aligned); + const uint64_t logged_word = get_word_access_data(written_data, word_offset); + if (word != logged_word) { + throw std::invalid_argument{"value being written to " + std::string(text) + + " does not match the logged written value at access " + std::to_string(access_to_report())}; + } + // check if logged written data differs from the logged read data only by the written word + access_data expected_written_data(read_data); // make a copy of read data + replace_word_access_data(word, expected_written_data, word_offset); // patch with written word + if (written_data != expected_written_data) { + throw std::invalid_argument{"logged written data of " + std::string(text) + + " doesn't differ from the logged read data only by the written word at access " + + std::to_string(access_to_report())}; + } + // NOLINTEND(bugprone-unchecked-optional-access) + // check proof if (m_verify_proofs) { auto proof = access.make_proof(m_root_hash); if (!proof.verify(m_hasher)) { @@ -277,43 +277,40 @@ class uarch_replay_state_access : public i_uarch_state_access&1]] - - local p = io.popen(lua_cmd .. lua_code) - local output = p:read(2000) - p:close() +do_test("dumped step log content should match", function(machine) + local log_type = { proofs = false, annotations = true } + local log = machine:log_uarch_step(log_type) + local temp_file = test_util.new_temp_file() + util.dump_log(log, temp_file) + local log_output = temp_file:read_all() + -- luacheck: push no max line length local expected_output = "begin step\n" .. " 1: read uarch.cycle@0x400008(4194312): 0x0(0)\n" .. " 2: read uarch.halt_flag@0x400000(4194304): 0x0(0)\n" @@ -698,17 +680,17 @@ do_test("dumped step log content should match", function() .. " 4: read memory@0x600000(6291456): 0x10089307b00513(4513027209561363)\n" .. " begin addi\n" .. " 5: read uarch.x@0x400018(4194328): 0x0(0)\n" - .. " 6: write uarch.x@0x400068(4194408): 0x0(0) -> 0x7b(123)\n" + .. " 6: write uarch.x@0x400068(4194408): 0x10050(65616) -> 0x7b(123)\n" .. " 7: write uarch.pc@0x400010(4194320): 0x600000(6291456) -> 0x600004(6291460)\n" .. " end addi\n" .. " 8: write uarch.cycle@0x400008(4194312): 0x0(0) -> 0x1(1)\n" .. "end step\n" - - print("Output of dump log:") + -- luacheck: pop + print("Log output:") print("--------------------------") - print(output) + print(log_output) print("--------------------------") - assert(output == expected_output, "Output does not match expected output:\n" .. expected_output) + assert(log_output == expected_output, "Output does not match expected output:\n" .. expected_output) end) print("\n\ntesting step and verification") @@ -756,7 +738,7 @@ do_test("Step log must contain conssitent data hashes", function(machine) -- ensure that verification fails with wrong written hash write_access.written_hash = wrong_hash _, err = pcall(module.machine.verify_uarch_step_log, log, {}) - assert(err:match("value being written to uarch.cycle does not hash to the logged written hash at access 8")) + assert(err:match("logged written data of uarch.cycle does not hash to the logged written hash at access 8")) end) do_test("step when uarch cycle is max", function(machine) @@ -1112,10 +1094,13 @@ do_test("Test unhappy paths of verify_uarch_step_state_transition", function(mac end assert_error("too few accesses in log", function(log) log.accesses = {} end) assert_error("expected access 1 to read uarch.uarch_cycle", function(log) log.accesses[1].address = 0 end) - assert_error("invalid log2_size", function(log) log.accesses[1].log2_size = 2 end) - assert_error("invalid log2_size", function(log) log.accesses[1].log2_size = 65 end) + assert_error( + "expected access 1 to read 2%^5 bytes from uarch.uarch_cycle", + function(log) log.accesses[1].log2_size = 2 end + ) + assert_error("target size cannot be greater than root size", function(log) log.accesses[1].log2_size = 65 end) assert_error("missing read uarch.uarch_cycle data at access 1", function(log) log.accesses[1].read = nil end) - assert_error("invalid read %(expected string with 2%^3 bytes%)", function(log) log.accesses[1].read = "\0" end) + assert_error("invalid read %(expected string with 2%^5 bytes%)", function(log) log.accesses[1].read = "\0" end) assert_error( "logged read data of uarch.uarch_cycle data does not hash to the logged read hash at access 1", function(log) log.accesses[1].read_hash = bad_hash end @@ -1127,12 +1112,12 @@ do_test("Test unhappy paths of verify_uarch_step_state_transition", function(mac ) assert_error("hash length must be 32 bytes", function(log) log.accesses[#log.accesses].written_hash = nil end) assert_error( - "invalid written %(expected string with 2%^3 bytes%)", + "invalid written %(expected string with 2%^5 bytes%)", function(log) log.accesses[#log.accesses].written = "\0" end ) assert_error( "logged written data of uarch.cycle does not hash to the logged written hash at access 7", - function(log) log.accesses[#log.accesses].written = "\0\0\0\0\0\0\0\0" end + function(log) log.accesses[#log.accesses].written = string.rep("\0", 32) end ) assert_error("Mismatch in root hash of access 1", function(log) log.accesses[1].sibling_hashes[1] = bad_hash end) end) @@ -1329,21 +1314,18 @@ do_test("Dump of log produced by send_cmio_response should match", function(mach local data = "0123456789" local reason = 7 local log = machine:log_send_cmio_response(reason, data, { proofs = true, annotations = true, large_data = false }) + -- luacheck: push no max line length local expected_dump = "begin send cmio response\n" .. " 1: read iflags.Y@0x2e8(744): 0x1a(26)\n" - .. ' 2: write cmio rx buffer@0x60000000(1610612736): hash:"4d9470a8"(2^4 bytes) -> ' - .. 'hash:"5d29fb90"(2^4 bytes)\n' + .. ' 2: write cmio rx buffer@0x60000000(1610612736): hash:"290decd9"(2^5 bytes) -> hash:"555b1f6d"(2^5 bytes)\n' .. " 3: write htif.fromhost@0x318(792): 0x0(0) -> 0x70000000a(30064771082)\n" .. " 4: read iflags.Y@0x2e8(744): 0x1a(26)\n" .. " 5: write iflags.Y@0x2e8(744): 0x1a(26) -> 0x18(24)\n" .. "end send cmio response\n" - local tmpname = os.tmpname() - local deleter = {} - setmetatable(deleter, { __gc = function() os.remove(tmpname) end }) - local tmp = io.open(tmpname, "w+") - util.dump_log(log, tmp) - tmp:seek("set", 0) - local actual_dump = tmp:read("*all") + -- luacheck: pop + local temp_file = test_util.new_temp_file() + util.dump_log(log, temp_file) + local actual_dump = temp_file:read_all() print("Output of log_send_cmio_response dump:") print("--------------------------") print(actual_dump) @@ -1353,10 +1335,10 @@ end) do_test("send_cmio_response with different data sizes", function(machine) local test_cases = { - { data_len = 1, write_len = 8 }, - { data_len = 8, write_len = 8 }, - { data_len = 9, write_len = 16 }, - { data_len = 16, write_len = 16 }, + { data_len = 1, write_len = 32 }, + { data_len = 32, write_len = 32 }, + { data_len = 33, write_len = 64 }, + { data_len = 64, write_len = 64 }, { data_len = 1 << 20, write_len = 1 << 20 }, { data_len = (1 << 20) + 1, write_len = 1 << 21 }, { data_len = 1 << 21, write_len = 1 << 21 }, @@ -1498,4 +1480,82 @@ local function test_cmio_buffers_backed_by_files() end test_cmio_buffers_backed_by_files() +local uarch_store_double_in_t0_to_t1 = { + 0x00533023, -- sd t0,0(t1) +} +test_util.make_do_test(build_machine, machine_type, { + uarch = { + ram = { image_filename = test_util.create_test_uarch_program(uarch_store_double_in_t0_to_t1) }, + }, +})("Log of word access unaligned to merkle tree leaf ", function(machine) + local leaf_size = 1 << cartesi.TREE_LOG2_WORD_SIZE + local word_size = 8 + local t0 = 5 -- x5 register + local t1 = t0 + 1 -- x6 register + local function make_leaf(w1, w2, w3, w4) + return string.rep(w1, word_size) + .. string.rep(w2, word_size) + .. string.rep(w3, word_size) + .. string.rep(w4, word_size) + end + -- write initial leaf data + local leaf_data = make_leaf("\x11", "\x22", "\x33", "\x44") + assert(#leaf_data == leaf_size) + local leaf_address = cartesi.UARCH_RAM_START_ADDRESS + (1 << cartesi.TREE_LOG2_WORD_SIZE) + machine:write_memory(leaf_address, leaf_data, leaf_size) + + -- step and log one instruction that stores the word in t0 to the address in t1 + -- returns raw and formatted log + local function log_step() + local log_type = { proofs = true, annotations = true } + local log = machine:log_uarch_step(log_type) + local temp_file = test_util.new_temp_file() + util.dump_log(log, temp_file) + return log, temp_file:read_all() + end + + -- write to the first word + machine:write_uarch_x(t1, leaf_address) + machine:write_uarch_x(t0, 0xaaaaaaaaaaaaaaaa) + local log, dump = log_step() + assert(dump:match("7: write memory@0x%x+%(%d+%): 0x1111111111111111%(%d+%) %-> 0xaaaaaaaaaaaaaaaa%(%d+%)")) + assert(log.accesses[7].read == leaf_data) + leaf_data = machine:read_memory(leaf_address, leaf_size) -- read and check written data + assert(leaf_data == make_leaf("\xaa", "\x22", "\x33", "\x44")) + assert(log.accesses[7].written == leaf_data) + + -- restart program and write to second leaf word + machine:write_uarch_pc(cartesi.UARCH_RAM_START_ADDRESS) + machine:write_uarch_x(t1, machine:read_uarch_x(t1) + word_size) + machine:write_uarch_x(t0, 0xbbbbbbbbbbbbbbbb) + log, dump = log_step() + assert(dump:match("7: write memory@0x%x+%(%d+%): 0x2222222222222222%(%d+%) %-> 0xbbbbbbbbbbbbbbbb%(%d+%)")) + assert(log.accesses[7].read == leaf_data) + leaf_data = machine:read_memory(leaf_address, leaf_size) + assert(leaf_data == make_leaf("\xaa", "\xbb", "\x33", "\x44")) + assert(log.accesses[7].written == leaf_data) + + -- restart program and write to third leaf word + machine:write_uarch_pc(cartesi.UARCH_RAM_START_ADDRESS) + machine:write_uarch_x(t1, machine:read_uarch_x(t1) + word_size) + machine:write_uarch_x(t0, 0xcccccccccccccccc) + log, dump = log_step() + assert(dump:match("7: write memory@0x%x+%(%d+%): 0x3333333333333333%(%d+%) %-> 0xcccccccccccccccc%(%d+%)")) + assert(log.accesses[7].read == leaf_data) + leaf_data = machine:read_memory(leaf_address, leaf_size) + assert(leaf_data == make_leaf("\xaa", "\xbb", "\xcc", "\x44")) + assert(log.accesses[7].written == leaf_data) + + -- restart program and write to fourth leaf word + machine:write_uarch_pc(cartesi.UARCH_RAM_START_ADDRESS) + machine:write_uarch_x(t1, machine:read_uarch_x(t1) + word_size) + machine:write_uarch_x(t0, 0xdddddddddddddddd) + log, dump = log_step() + assert(dump:match("7: write memory@0x%x+%(%d+%): 0x4444444444444444%(%d+%) %-> 0xdddddddddddddddd%(%d+%)")) + assert(log.accesses[7].read == leaf_data) + leaf_data = machine:read_memory(leaf_address, leaf_size) + assert(leaf_data == make_leaf("\xaa", "\xbb", "\xcc", "\xdd")) + assert(log.accesses[7].written == leaf_data) +end) + print("\n\nAll machine binding tests for type " .. machine_type .. " passed") diff --git a/tests/lua/mcycle-overflow.lua b/tests/lua/mcycle-overflow.lua index 78687ada8..bace9b50d 100755 --- a/tests/lua/mcycle-overflow.lua +++ b/tests/lua/mcycle-overflow.lua @@ -88,7 +88,9 @@ for _, proofs in ipairs({ true, false }) do assert(#log.accesses == 1) assert(log.accesses[1].type == "read") assert(log.accesses[1].address == cartesi.UARCH_SHADOW_START_ADDRESS + 8) -- address of uarch_cycle - assert(log.accesses[1].read == string.pack("J", MAX_UARCH_CYCLE)) + assert(#log.accesses[1].read == 32) + -- log data has 32 bytes. The uarch_cycle is the 2nd 8-byte word + assert(log.accesses[1].read:sub(9, 16) == string.pack("J", MAX_UARCH_CYCLE)) assert((log.accesses[1].sibling_hashes ~= nil) == proofs) end) end diff --git a/tests/misc/test-machine-c-api.cpp b/tests/misc/test-machine-c-api.cpp index 772f828db..726c613e2 100644 --- a/tests/misc/test-machine-c-api.cpp +++ b/tests/misc/test-machine-c-api.cpp @@ -44,9 +44,9 @@ #include #include #include -#include #include "test-utils.h" +#include "uarch-solidity-compat.h" // NOLINTBEGIN(cppcoreguidelines-avoid-do-while) @@ -718,7 +718,7 @@ BOOST_FIXTURE_TEST_CASE_NOLINT(get_proof_inconsistent_tree_test, ordinary_machin // merkle tree is always consistent now as it updates on access - error_code = cm_get_proof(_machine, 0, 3, &proof, &err_msg); + error_code = cm_get_proof(_machine, 0, CM_TREE_LOG2_PAGE_SIZE, &proof, &err_msg); BOOST_CHECK_EQUAL(error_code, CM_ERROR_OK); cm_delete_merkle_tree_proof(proof); } diff --git a/tests/misc/test-utils.h b/tests/misc/test-utils.h index e43ec651f..1e3ef050b 100644 --- a/tests/misc/test-utils.h +++ b/tests/misc/test-utils.h @@ -25,7 +25,7 @@ using hash_type = cartesi::keccak_256_hasher::hash_type; // Calculate root hash for data buffer of log2_size namespace detail { -constexpr int WORD_LOG2_SIZE = 3; +constexpr int WORD_LOG2_SIZE = 5; constexpr int MERKLE_PAGE_LOG2_SIZE = 12; constexpr int MERKLE_PAGE_SIZE = (UINT64_C(1) << MERKLE_PAGE_LOG2_SIZE); @@ -84,7 +84,7 @@ static hash_type calculate_proof_root_hash(const cm_merkle_tree_proof *proof) { } static hash_type calculate_emulator_hash(cm_machine *machine) { - cartesi::back_merkle_tree tree(64, 12, 3); + cartesi::back_merkle_tree tree(CM_TREE_LOG2_ROOT_SIZE, CM_TREE_LOG2_PAGE_SIZE, CM_TREE_LOG2_WORD_SIZE); std::string page; page.resize(detail::MERKLE_PAGE_SIZE); cm_memory_range_descr_array *mrds = nullptr; From e6bf24ed772c7c5061e8fd4973baddbca37548a7 Mon Sep 17 00:00:00 2001 From: Marcos Pernambuco Motta <1091485+mpernambuco@users.noreply.github.com> Date: Sat, 3 Aug 2024 21:10:42 -0300 Subject: [PATCH 3/3] feat: change layout of uarch json logs --- tests/lua/uarch-riscv-tests.lua | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/lua/uarch-riscv-tests.lua b/tests/lua/uarch-riscv-tests.lua index d2ef4fd6f..862b4cc24 100755 --- a/tests/lua/uarch-riscv-tests.lua +++ b/tests/lua/uarch-riscv-tests.lua @@ -389,19 +389,18 @@ local function write_access_to_log(access, out, indent, last) util.indentout(out, indent + 1, '"type": "%s",\n', access.type) util.indentout(out, indent + 1, '"address": %u,\n', access.address) util.indentout(out, indent + 1, '"log2_size": %u,\n', access.log2_size) + local read_value = "" -- Solidity JSON parser breaks, if this field is null + if access.read then read_value = util.hexstring(access.read) end + util.indentout(out, indent + 1, '"read_value": "%s",\n', read_value) + util.indentout(out, indent + 1, '"read_hash": "%s",\n', util.hexhash(access.read_hash)) + local written_value = "" + local written_hash = "" if access.type == "write" then - local value = "null" - if access.written then value = '"' .. util.hexstring(access.written) .. '"' end - util.indentout(out, indent + 1, '"value": %s,', value) - util.indentout(out, indent + 1, '"hash": "%s",', util.hexhash(access.written_hash)) - util.indentout(out, indent + 1, '"read_hash": "%s"', util.hexhash(access.read_hash)) - else - local value = "null" - if access.read then value = '"' .. util.hexstring(access.read) .. '"' end - util.indentout(out, indent + 1, '"value": %s,', value) - util.indentout(out, indent + 1, '"hash": "%s",', util.hexhash(access.read_hash)) - util.indentout(out, indent + 1, '"read_hash": "%s"', util.hexhash(access.read_hash)) + written_hash = util.hexhash(access.written_hash) + if access.written then written_value = util.hexstring(access.written) end end + util.indentout(out, indent + 1, '"written_value": "%s",\n', written_value) + util.indentout(out, indent + 1, '"written_hash": "%s"', written_hash) if access.sibling_hashes then out:write(",\n") write_sibling_hashes_to_log(access.sibling_hashes, out, indent + 2)