From b28572a95b369c9372b1216e4faa4b901f52bb55 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 22 Jul 2024 15:38:20 -0700 Subject: [PATCH 1/6] basic impl for extcodehash --- src/halmos/sevm.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 43dcf007..c1cfc001 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -2313,13 +2313,15 @@ def finalize(ex: Exec): # TODO: define f_extcodehash for known addresses in advance elif opcode == EVM.EXTCODEHASH: - account = uint160(ex.st.pop()) - account_addr = self.resolve_address_alias(ex, account) + account_addr = self.resolve_address_alias(ex, uint160(ex.st.pop())) + account_code: Optional[Contract] = ex.code.get(account_addr, None) + codehash = ( f_extcodehash(account_addr) - if account_addr is not None - else f_extcodehash(account) + if account_code is None + else ex.sha3_data(account_code._code.unwrap()) ) + ex.st.push(codehash) elif opcode == EVM.CODESIZE: From 9cd94db4b5c49d7712e55530ac476ca0277890b2 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 22 Jul 2024 17:48:20 -0700 Subject: [PATCH 2/6] feat: implement EXTCODEHASH + EXTCODECOPY https://github.com/a16z/halmos/issues/324 --- src/halmos/sevm.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index c1cfc001..6ddd6e9d 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -37,6 +37,7 @@ Steps = Dict[int, Dict[str, Any]] # execution tree EMPTY_BYTES = ByteVec() +EMPTY_KECCAK = con(0xC5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470) MAX_CALL_DEPTH = 1024 # TODO: make this configurable @@ -844,7 +845,7 @@ def sha3_data(self, data: Bytes) -> Word: ) sha3_expr = f_sha3(data) else: - sha3_expr = BitVec("f_sha3_0", BitVecSort256) + sha3_expr = EMPTY_KECCAK # assume hash values are sufficiently smaller than the uint max self.path.append(ULE(sha3_expr, 2**256 - 2**64)) @@ -2311,13 +2312,35 @@ def finalize(ex: Exec): ex.path.append(codesize > 0) ex.st.push(codesize) - # TODO: define f_extcodehash for known addresses in advance - elif opcode == EVM.EXTCODEHASH: + elif opcode == EVM.EXTCODECOPY: account_addr = self.resolve_address_alias(ex, uint160(ex.st.pop())) - account_code: Optional[Contract] = ex.code.get(account_addr, None) + account_code: Contract = ( + ex.code.get(account_addr, None) or ByteVec() + ) + + loc: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + offset: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") + size: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY size") + end_loc = loc + size + + if end_loc > MAX_MEMORY_SIZE: + raise HalmosException("EXTCODECOPY > MAX_MEMORY_SIZE") + + if size > 0: + codeslice: ByteVec = account_code._code.slice( + offset, offset + size + ) + ex.st.memory.set_slice(loc, end_loc, codeslice) + + elif opcode == EVM.EXTCODEHASH: + account_addr = uint160(ex.st.pop()) + alias_addr = self.resolve_address_alias(ex, account_addr) + addr = alias_addr if alias_addr is not None else account_addr + + account_code: Optional[Contract] = ex.code.get(addr, None) codehash = ( - f_extcodehash(account_addr) + f_extcodehash(addr) if account_code is None else ex.sha3_data(account_code._code.unwrap()) ) From 64a5ea14ec0436acafb163e512332cbb54b3b14b Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 22 Jul 2024 17:51:07 -0700 Subject: [PATCH 3/6] Add ExtcodehashTest --- tests/regression/test/Extcodehash.t.sol | 133 ++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/regression/test/Extcodehash.t.sol diff --git a/tests/regression/test/Extcodehash.t.sol b/tests/regression/test/Extcodehash.t.sol new file mode 100644 index 00000000..9814185e --- /dev/null +++ b/tests/regression/test/Extcodehash.t.sol @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: AGPL-3.0 +pragma solidity >=0.8.0 <0.9.0; + +import "forge-std/Test.sol"; +import {SymTest} from "halmos-cheatcodes/SymTest.sol"; + +contract A {} + +contract B { + // make sure bytecode is different from A + function beep() public pure {} +} + +contract Empty { + // constructor returns 0-length runtime bytecode + constructor() { + assembly { + return(0, 0) + } + } +} + +contract ExtcodehashTest is Test, SymTest { + address a1; + bytes32 a1hash; + + function setUp() public { + address _a1 = address(new A()); + bytes32 _a1hash; + + assembly { + _a1hash := extcodehash(_a1) + } + + a1 = _a1; + a1hash = _a1hash; + } + + function check_extcodehash_a1_eq_a2() external { + address a2 = address(new A()); + bytes32 a2hash; + assembly { + a2hash := extcodehash(a2) + } + + // extcodehash(a1) == extcodehash(a2) + assertEq(a1hash, a2hash); + } + + function check_extcodehash_a1_ne_b1() external { + address b1 = address(new B()); + bytes32 b1hash; + assembly { + b1hash := extcodehash(b1) + } + + // extcodehash(a1) != extcodehash(b1) + assertNotEq(a1hash, b1hash); + } + + function check_extcodehash_a1_eq_directHash() external { + // extcodehash(a1) == keccak256(extcodecopy(a1)) + assertEq(a1hash, keccak256(a1.code)); + } + + function check_extcodehash_a1_eq_runtimeCodeHash() external { + assertEq(a1hash, keccak256(type(A).runtimeCode)); + } + + function check_extcodehash_eq_directHash() external { + uint256 thisCodeSize; + assembly { + thisCodeSize := codesize() + } + + bytes memory thisCode = new bytes(thisCodeSize); + bytes32 thisCodeHash; + assembly { + codecopy(add(thisCode, 0x20), 0, thisCodeSize) + thisCodeHash := extcodehash(address()) + } + + // extcodehash(address()) == keccak256(codecopy()) + assertEq(thisCodeHash, keccak256(thisCode)); + } + + function check_extcodehash_empty() external { + address emptyCodeAddr = address(new Empty()); + assertEq(emptyCodeAddr.code.length, 0, "Empty contract should have no code"); + + bytes32 codehash; + assembly { + codehash := extcodehash(emptyCodeAddr) + } + + assertEq(codehash, keccak256(""), "Expected codehash of the empty string"); + } + + /// yields a path where it is eq to 0, and a path where it is not + /// so we expect a counterexample for this and also its negation + function check_extcodehash_unknown_addr_eq_0() external { + bytes32 codehash; + assembly { + codehash := extcodehash(0x1337) + } + + assertEq(codehash, 0); + } + + /// yields a path where it is eq to 0, and a path where it is not + /// so we expect a counterexample for this and also its negation + function check_extcodehash_unknown_addr_noteq_0() external { + bytes32 codehash; + assembly { + codehash := extcodehash(0x1337) + } + + assertNotEq(codehash, 0); + } + + function check_extcodehash_after_etch() external { + address who = address(0x1337); + bytes memory code = svm.createBytes(42, "code"); + vm.etch(who, code); + + bytes32 codehash; + assembly { + codehash := extcodehash(who) + } + + assertEq(codehash, keccak256(code)); + } +} From ca9f79a4197b7dc4edf994a0b6cac5a86731bfb9 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Mon, 22 Jul 2024 17:51:17 -0700 Subject: [PATCH 4/6] add expected results for ExtcodehashTest --- tests/expected/all.json | 83 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/expected/all.json b/tests/expected/all.json index 571c73e2..aa1dba6e 100644 --- a/tests/expected/all.json +++ b/tests/expected/all.json @@ -786,6 +786,89 @@ "num_bounded_loops": null } ], + "test/Extcodehash.t.sol:ExtcodehashTest": [ + { + "name": "check_extcodehash_a1_eq_a2()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_eq_directHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_eq_runtimeCodeHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_a1_ne_b1()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_after_etch()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_empty()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_eq_directHash()", + "exitcode": 0, + "num_models": 0, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_unknown_addr_eq_0()", + "exitcode": 1, + "num_models": 1, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + }, + { + "name": "check_extcodehash_unknown_addr_noteq_0()", + "exitcode": 1, + "num_models": 1, + "models": null, + "num_paths": null, + "time": null, + "num_bounded_loops": null + } + ], "test/Foundry.t.sol:DeepFailer": [ { "name": "check_fail_cheatcode()", From 30f5b6a2c0acfc5da3ec003bebea8daff0e14cb1 Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 24 Jul 2024 11:11:07 -0700 Subject: [PATCH 5/6] warn if resolve_address_alias returns unknown --- src/halmos/sevm.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 6ddd6e9d..5861857d 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -2313,10 +2313,20 @@ def finalize(ex: Exec): ex.st.push(codesize) elif opcode == EVM.EXTCODECOPY: - account_addr = self.resolve_address_alias(ex, uint160(ex.st.pop())) - account_code: Contract = ( - ex.code.get(account_addr, None) or ByteVec() - ) + account = uint160(ex.st.pop()) + + # TODO: handle the case where account may alias multiple addresses + account_addr = self.resolve_address_alias(ex, account) + if account_addr is None: + # this could be unsound if the solver in resolve_address_alias + # returns unknown, meaning that there is in fact a must-alias + # address, but we didn't find it in time + warn( + f"EXTCODECOPY: unknown address {hexify(account)} " + "is assumed to have empty bytecode" + ) + + account_code: Contract = ex.code.get(account_addr) or ByteVec() loc: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") offset: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") From 851c75e94d02ea3bdcec60ed5acbcad666737afc Mon Sep 17 00:00:00 2001 From: karmacoma Date: Wed, 24 Jul 2024 11:16:31 -0700 Subject: [PATCH 6/6] EXTCODECOPY: short-circuit if size == 0 --- src/halmos/sevm.py | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/src/halmos/sevm.py b/src/halmos/sevm.py index 5861857d..93044120 100644 --- a/src/halmos/sevm.py +++ b/src/halmos/sevm.py @@ -2313,30 +2313,28 @@ def finalize(ex: Exec): ex.st.push(codesize) elif opcode == EVM.EXTCODECOPY: - account = uint160(ex.st.pop()) - - # TODO: handle the case where account may alias multiple addresses - account_addr = self.resolve_address_alias(ex, account) - if account_addr is None: - # this could be unsound if the solver in resolve_address_alias - # returns unknown, meaning that there is in fact a must-alias - # address, but we didn't find it in time - warn( - f"EXTCODECOPY: unknown address {hexify(account)} " - "is assumed to have empty bytecode" - ) - - account_code: Contract = ex.code.get(account_addr) or ByteVec() - + account: Address = uint160(ex.st.pop()) loc: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") offset: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY offset") size: int = int_of(ex.st.pop(), "symbolic EXTCODECOPY size") - end_loc = loc + size - - if end_loc > MAX_MEMORY_SIZE: - raise HalmosException("EXTCODECOPY > MAX_MEMORY_SIZE") if size > 0: + end_loc = loc + size + if end_loc > MAX_MEMORY_SIZE: + raise HalmosException("EXTCODECOPY > MAX_MEMORY_SIZE") + + # TODO: handle the case where account may alias multiple addresses + account_addr = self.resolve_address_alias(ex, account) + if account_addr is None: + # this could be unsound if the solver in resolve_address_alias + # returns unknown, meaning that there is in fact a must-alias + # address, but we didn't find it in time + warn( + f"EXTCODECOPY: unknown address {hexify(account)} " + "is assumed to have empty bytecode" + ) + + account_code: Contract = ex.code.get(account_addr) or ByteVec() codeslice: ByteVec = account_code._code.slice( offset, offset + size )