From c969bcd6a952256a3cf3cb8ceafec3957496ffdd Mon Sep 17 00:00:00 2001 From: karmacoma Date: Thu, 25 Jul 2024 17:45:08 -0700 Subject: [PATCH] feat: functional implementation of EXTCODEHASH and EXTCODECOPY (#328) --- src/halmos/sevm.py | 47 +++++++-- tests/expected/all.json | 83 +++++++++++++++ tests/regression/test/Extcodehash.t.sol | 133 ++++++++++++++++++++++++ 3 files changed, 256 insertions(+), 7 deletions(-) create mode 100644 tests/regression/test/Extcodehash.t.sol diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 43dcf007..93044120 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -37,6 +37,7 @@ Steps = Dict[int, Dict[str, Any]] # execution tree EMPTY_BYTES = ByteVec() +EMPTY_KECCAK = con(0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470) MAX_CALL_DEPTH = 1024 # TODO: make this configurable @@ -844,7 +845,7 @@ def sha3_data(self, data: Bytes) -> Word: ) sha3_expr = f_sha3(data) else: - sha3_expr = BitVec("f_sha3_0", BitVecSort256) + sha3_expr = EMPTY_KECCAK # assume hash values are sufficiently smaller than the uint max self.path.append(ULE(sha3_expr, 2**256 - 2**64)) @@ -2311,15 +2312,47 @@ def finalize(ex: Exec): ex.path.append(codesize > 0) ex.st.push(codesize) - # TODO: define f_extcodehash for known addresses in advance + elif opcode == EVM.EXTCODECOPY: + account: Address = uint160(ex.st.pop()) + loc: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + offset: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + size: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY size") + + if size > 0: + end_loc = loc + size + if end_loc > MAX_MEMORY_SIZE: + raise HalmosException("EXTCODECOPY > MAX_MEMORY_SIZE") + + # TODO: handle the case where account may alias multiple addresses + account_addr = self.resolve_address_alias(ex, account) + if account_addr is None: + # this could be unsound if the solver in resolve_address_alias + # returns unknown, meaning that there is in fact a must-alias + # address, but we didn't find it in time + warn( + f"EXTCODECOPY: unknown address {hexify(account)} " + "is assumed to have empty bytecode" + ) + + account_code: Contract = ex.code.get(account_addr) or ByteVec() + codeslice: ByteVec = account_code._code.slice( + offset, offset + size + ) + ex.st.memory.set_slice(loc, end_loc, codeslice) + elif opcode == EVM.EXTCODEHASH: - account = uint160(ex.st.pop()) - account_addr = self.resolve_address_alias(ex, account) + account_addr = uint160(ex.st.pop()) + alias_addr = self.resolve_address_alias(ex, account_addr) + addr = alias_addr if alias_addr is not None else account_addr + + account_code: Optional[Contract] = ex.code.get(addr, None) + codehash = ( - f_extcodehash(account_addr) - if account_addr is not None - else f_extcodehash(account) + f_extcodehash(addr) + if account_code is None + else ex.sha3_data(account_code._code.unwrap()) ) + ex.st.push(codehash) elif opcode == EVM.CODESIZE: diff --git a/tests/expected/all.json b/tests/expected/all.json index 577e488e..dcf051e5 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -786,6 +786,89 @@ "num_bounded_loops": null } ], + "test/Extcodehash.t.sol:ExtcodehashTest": [ + { + "name": "check_extcodehash_a1_eq_a2()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_eq_directHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_eq_runtimeCodeHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_ne_b1()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_after_etch()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_empty()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_eq_directHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_unknown_addr_eq_0()", + "exitcode": 1, + "num_models": 1, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_unknown_addr_noteq_0()", + "exitcode": 1, + "num_models": 1, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + } + ], "test/Foundry.t.sol:DeepFailer": [ { "name": "check_fail_cheatcode()", diff --git a/tests/regression/test/Extcodehash.t.sol b/tests/regression/test/Extcodehash.t.sol new file mode 100644 index 00000000..9814185e --- /dev/null +++ b/tests/regression/test/Extcodehash.t.sol @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: AGPL-3.0 +pragma solidity >=0.8.0 <0.9.0; + +import "forge-std/Test.sol"; +import {SymTest} from "halmos-cheatcodes/SymTest.sol"; + +contract A {} + +contract B { + // make sure bytecode is different from A + function beep() public pure {} +} + +contract Empty { + // constructor returns 0-length runtime bytecode + constructor() { + assembly { + return(0, 0) + } + } +} + +contract ExtcodehashTest is Test, SymTest { + address a1; + bytes32 a1hash; + + function setUp() public { + address _a1 = address(new A()); + bytes32 _a1hash; + + assembly { + _a1hash := extcodehash(_a1) + } + + a1 = _a1; + a1hash = _a1hash; + } + + function check_extcodehash_a1_eq_a2() external { + address a2 = address(new A()); + bytes32 a2hash; + assembly { + a2hash := extcodehash(a2) + } + + // extcodehash(a1) == extcodehash(a2) + assertEq(a1hash, a2hash); + } + + function check_extcodehash_a1_ne_b1() external { + address b1 = address(new B()); + bytes32 b1hash; + assembly { + b1hash := extcodehash(b1) + } + + // extcodehash(a1) != extcodehash(b1) + assertNotEq(a1hash, b1hash); + } + + function check_extcodehash_a1_eq_directHash() external { + // extcodehash(a1) == keccak256(extcodecopy(a1)) + assertEq(a1hash, keccak256(a1.code)); + } + + function check_extcodehash_a1_eq_runtimeCodeHash() external { + assertEq(a1hash, keccak256(type(A).runtimeCode)); + } + + function check_extcodehash_eq_directHash() external { + uint256 thisCodeSize; + assembly { + thisCodeSize := codesize() + } + + bytes memory thisCode = new bytes(thisCodeSize); + bytes32 thisCodeHash; + assembly { + codecopy(add(thisCode, 0x20), 0, thisCodeSize) + thisCodeHash := extcodehash(address()) + } + + // extcodehash(address()) == keccak256(codecopy()) + assertEq(thisCodeHash, keccak256(thisCode)); + } + + function check_extcodehash_empty() external { + address emptyCodeAddr = address(new Empty()); + assertEq(emptyCodeAddr.code.length, 0, "Empty contract should have no code"); + + bytes32 codehash; + assembly { + codehash := extcodehash(emptyCodeAddr) + } + + assertEq(codehash, keccak256(""), "Expected codehash of the empty string"); + } + + /// yields a path where it is eq to 0, and a path where it is not + /// so we expect a counterexample for this and also its negation + function check_extcodehash_unknown_addr_eq_0() external { + bytes32 codehash; + assembly { + codehash := extcodehash(0x1337) + } + + assertEq(codehash, 0); + } + + /// yields a path where it is eq to 0, and a path where it is not + /// so we expect a counterexample for this and also its negation + function check_extcodehash_unknown_addr_noteq_0() external { + bytes32 codehash; + assembly { + codehash := extcodehash(0x1337) + } + + assertNotEq(codehash, 0); + } + + function check_extcodehash_after_etch() external { + address who = address(0x1337); + bytes memory code = svm.createBytes(42, "code"); + vm.etch(who, code); + + bytes32 codehash; + assembly { + codehash := extcodehash(who) + } + + assertEq(codehash, keccak256(code)); + } +}