From 6be42094a009e5cb8cdf3df5e2397b29341f1fd8 Mon Sep 17 00:00:00 2001 From: Ravyu Sivakumaran Date: Mon, 26 Aug 2024 18:17:42 +0000 Subject: [PATCH] flamenco, vm: (precise faulting) fix off-by-1 error when deducting CUs on faults to match Agave --- contrib/test/vm_interp-fixtures.list | 1 + src/flamenco/vm/fd_vm_interp_core.c | 127 +++++++++++++++++++++++++-- 2 files changed, 119 insertions(+), 9 deletions(-) diff --git a/contrib/test/vm_interp-fixtures.list b/contrib/test/vm_interp-fixtures.list index e69de29bb2..41145b8fbe 100644 --- a/contrib/test/vm_interp-fixtures.list +++ b/contrib/test/vm_interp-fixtures.list @@ -0,0 +1 @@ +dump/test-vectors/vm_interp/fixtures/interp_cu_off_by_one_1.fix \ No newline at end of file diff --git a/src/flamenco/vm/fd_vm_interp_core.c b/src/flamenco/vm/fd_vm_interp_core.c index 8905f68005..39eab05b93 100644 --- a/src/flamenco/vm/fd_vm_interp_core.c +++ b/src/flamenco/vm/fd_vm_interp_core.c @@ -663,11 +663,12 @@ FD_VM_INTERP_INSTR_END; FD_VM_INTERP_BRANCH_BEGIN(0x95) /* FD_SBPF_OP_EXIT */ - if( FD_UNLIKELY( !frame_cnt ) ) { - pc++; - pc0 = pc; /* Start a new linear segment */ - goto sigexit; /* Exit program */ - } + /* Agave JIT VM exit implementation analysis below. + + Agave references: + https://github.com/solana-labs/rbpf/blob/v0.8.5/src/interpreter.rs#L503-L509 + https://github.com/solana-labs/rbpf/blob/v0.8.5/src/jit.rs#L697-L702 */ + if( FD_UNLIKELY( !frame_cnt ) ) goto sigexit; /* Exit program */ frame_cnt--; reg[6] = shadow[ frame_cnt ].r6; reg[7] = shadow[ frame_cnt ].r7; @@ -812,9 +813,10 @@ instruction and the number of non-branching instructions that have not yet been reflected in ic and cu is: - pc - pc0 - ic_correction + pc - pc0 + 1 - ic_correction - as per the accounting described above. + as per the accounting described above. +1 to include the faulting + instruction itself. Note that, for a sigtext caused by a branch instruction, pc0==pc (from the BRANCH_END) and ic_correction==0 (from the BRANCH_BEGIN) @@ -824,7 +826,7 @@ sigsplit. */ #define FD_VM_INTERP_FAULT \ - ic_correction = pc - pc0 - ic_correction; \ + ic_correction = pc - pc0 + 1UL - ic_correction; \ ic += ic_correction; \ if ( FD_UNLIKELY( ic_correction > cu ) ) err = FD_VM_ERR_SIGCOST; \ cu -= fd_ulong_min( ic_correction, cu ) @@ -840,7 +842,7 @@ sigsegv: FD_VM_INTERP_FAULT; err = FD_VM_ERR_SIGSEGV; got sigcost: /* ic current */ cu = 0UL; err = FD_VM_ERR_SIGCOST; goto interp_halt; sigsyscall: /* ic current */ /* cu current */ /* err current */ goto interp_halt; sigfpe: FD_VM_INTERP_FAULT; err = FD_VM_ERR_SIGFPE; goto interp_halt; -sigexit: FD_VM_INTERP_FAULT; /* cu current */ /* err current */ goto interp_halt; +sigexit: /* ic current */ /* cu current */ /* err current */ goto interp_halt; #undef FD_VM_INTERP_FAULT @@ -870,3 +872,110 @@ sigexit: FD_VM_INTERP_FAULT; /* cu current */ /* err current */ goto # if defined(__GNUC__) # pragma GCC diagnostic pop # endif + +/* Agave/JIT CU model analysis (and why we are conformant!): + + The Agave JIT employs a similar strategy of accumulating instructions + in a linear run and processing them at the start of a new linear + run/branch (side note: the JIT treats the LDQ instruction as a "branch" + that jumps pc + 2). + + In what is assumed to be an act of register conservation, the JIT + uses a catch-all "instruction meter" (IM) register (REGISTER_INSTRUCTION_METER) + that represents two different interpretations of the question + "how many instructions can I execute?". + + The IM, depending on where we are in the execution, either represents: + 1. IM => The number of instructions remaining before exhausting CU + budget. This is analagous to vm->cu in our interpreter. + 2. IM' => The last pc you can execute in the current linear run before + exhausting CU budget. Mathematically, IM' = IM + pc0 + where pc0, just like our definition, is the start of the linear run. + + Note: IM' can go past the actual basic block/segment. In-fact, + it typically does, and implies we can execute the full block without + exhausting CU budget (reminder that LDQ is treated as a branch). + + By default, the IM' form is used during execution. The IM form is used: + - (transiently) during the processing of a branch instruction + - in post-VM cleanup (updates EbpfVm::previous_instruction_meter). + + When a branch instruction is encountered, the JIT checks + for CU exhaustion with pc > IM', and throws an exception if so. This is valid, + because as described above, IM' is the largest PC you can reach. + + If we haven't exhausted our CU limit, it updates IM': + 1. IM = IM' - (pc + 1) # Note that IM' at this point is IM + pc0', + # where pc0' is the start of the current linear run. + 2. IM' = IM + pc0 # pc0 is the start of the new linear run (typically the target pc) + + Code (that does the above in one ALU instruction): + https://github.com/solana-labs/rbpf/blob/v0.8.5/src/jit.rs#L891 + + + ### How does this relate to our interpreter? + + This process is similar to FD_VM_INTERP_BRANCH_BEGIN. + We just deal with the IM form throughout (with vm->cu and ic_correction). + If we break down step 1 from above with what we know about IM and IM', + we get the following: + 1. IM = IM' - (pc + 1) + IM = (IM + pc0') - (pc + 1) + IM = IM + (pc0' - (pc + 1)) + IM = IM - ((pc + 1) - pc0') + IM = IM - ic_correction + Here, ((pc + 1) - pc0') is the number of instrutions executed in the current + linear run. This is the same as our ic_correction(*) in FD_VM_INTERP_BRANCH_BEGIN. + + If we replace IM with cu, this effectively becomes the + cu -= ic_correction + line in FD_VM_INTERP_BRANCH_BEGIN. + + (*) Note: ic_correction (also) takes two forms. It is either the instruction + accumulator or the number of instructions executed in the current linear run. + It (transiently) takes the latter form during FD_VM_INTERP_BRANCH_BEGIN and + FD_VM_INTERP_FAULT, and the former form otherwise. +*/ + +/* (WIP) Precise faulting and the Agave JIT: + + Since the cost model is a part of consensus, we need to conform with the Agave/JIT + cost model 1:1. To achieve that, our faulting model also needs to match precisely. This + section covers the various faults that the respective VMs implement and how they match. + + # Normal VM exit (sigexit): + VM exit instruction entrypoint: https://github.com/solana-labs/rbpf/blob/12237895305ab38514be865ebed6268553e4f589/src/jit.rs#L698-L708 + + Pseudocode (with FD semantics): + ``` + # pc is at the exit instruction + # pc0 is the start of the current linear run + if (frame_cnt == 0) { + goto sigexit; + } + ... + + sigexit: + if IM' <= pc { + goto sigcost; + } else { + goto interp_halt; + } + ``` + + Breaking down the IM' < pc check: + - IM' = IM + pc0 + - pc = ic + pc0, where (ic + 1) is the number of instructions executed in the current linear run + + IM' <= pc + IM + pc0 <= ic + pc0 + IM <= ic + IM <= pc - pc0 + IM < pc - pc0 + 1 # all unsigned integers + IM < ic_correction + + This is analagous to the ic_correction>cu check in VM_INTERP_BRANCH_BEGIN. + + # (TODO) Text Overrun (sigtext/sigsplit): + +*/