From 930af13f609851081cd9942ce637de7ec3d40258 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 22 Aug 2023 16:57:06 -0500
Subject: [PATCH 01/42] make PassMemory mutability more fine-grained

Needed to allow certain fields to be mutated during the run/execute visitor pattern that previously required a borrow on the entire PassMemory object.
---
 circuit_passes/src/bucket_interpreter/mod.rs  |  62 ++------
 .../src/bucket_interpreter/value.rs           |  29 ++--
 .../src/passes/conditional_flattening.rs      |  15 +-
 .../deterministic_subcomponent_invocation.rs  |  15 +-
 circuit_passes/src/passes/loop_unroll.rs      |  18 +--
 .../src/passes/mapped_to_indexed.rs           |  24 ++-
 circuit_passes/src/passes/memory.rs           | 138 +++++++++++-------
 circuit_passes/src/passes/mod.rs              |   2 +-
 circuit_passes/src/passes/simplification.rs   |  24 ++-
 .../src/passes/unknown_index_sanitization.rs  |  38 +++--
 10 files changed, 177 insertions(+), 188 deletions(-)
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index c5ff9627f..250da58dc 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -4,8 +4,6 @@ pub mod observer;
 pub(crate) mod operations;
 
 use circom_algebra::modular_arithmetic;
-use code_producers::components::TemplateInstanceIOMap;
-use code_producers::llvm_elements::IndexMapping;
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer};
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::num_bigint::BigInt;
@@ -15,17 +13,13 @@ use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
+use crate::passes::memory::PassMemory;
 
 pub struct BucketInterpreter<'a> {
-    _scope: &'a String,
-    _prime: &'a String,
-    pub constant_fields: &'a Vec<String>,
     pub(crate) observer: &'a dyn InterpreterObserver,
-    io_map: &'a TemplateInstanceIOMap,
+    mem: &'a PassMemory,
+    scope: String,
     p: BigInt,
-    signal_index_mapping: &'a IndexMapping,
-    variables_index_mapping: &'a IndexMapping,
-    component_addr_index_mapping: &'a IndexMapping,
 }
 
 pub type R<'a> = (Option<Value>, Env<'a>);
@@ -47,26 +41,12 @@ impl JoinSemiLattice for R<'_> {
 }
 
 impl<'a> BucketInterpreter<'a> {
-    pub fn init(
-        scope: &'a String,
-        prime: &'a String,
-        constant_fields: &'a Vec<String>,
-        observer: &'a dyn InterpreterObserver,
-        io_map: &'a TemplateInstanceIOMap,
-        signal_index_mapping: &'a IndexMapping,
-        variables_index_mapping: &'a IndexMapping,
-        component_addr_index_mapping: &'a IndexMapping,
-    ) -> Self {
+    pub fn init(observer: &'a dyn InterpreterObserver, mem: &'a PassMemory, scope: String) -> Self {
         BucketInterpreter {
-            _scope: scope,
-            _prime: prime,
-            constant_fields,
             observer,
-            io_map,
-            p: UsefulConstants::new(prime).get_p().clone(),
-            signal_index_mapping,
-            variables_index_mapping,
-            component_addr_index_mapping,
+            mem,
+            scope: scope.clone(),
+            p: UsefulConstants::new(mem.get_prime()).get_p().clone(),
         }
     }
 
@@ -91,33 +71,19 @@ impl<'a> BucketInterpreter<'a> {
         match bucket.dest_address_type {
             AddressType::Variable => {
                 let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self
-                    .variables_index_mapping
-                    .get(&idx)
-                    .expect(
-                        format!(
-                            "Could not get idx {idx} from mapping. Min key {:?}. Max key {:?}",
-                            self.variables_index_mapping.keys().min(),
-                            self.variables_index_mapping.keys().max()
-                        )
-                        .as_str(),
-                    )
-                    .clone();
-                for index in indices {
+                for index in self.mem.get_variables_index_mapping(&self.scope, &idx) {
                     vars.push(index);
                 }
             }
             AddressType::Signal => {
                 let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self.signal_index_mapping[&idx].clone();
-                for index in indices {
+                for index in self.mem.get_signal_index_mapping(&self.scope, &idx) {
                     signals.push(index);
                 }
             }
             AddressType::SubcmpSignal { .. } => {
                 let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self.component_addr_index_mapping[&idx].clone();
-                for index in indices {
+                for index in self.mem.get_component_addr_index_mapping(&self.scope, &idx) {
                     subcmps.push(index);
                 }
             }
@@ -219,14 +185,14 @@ impl<'a> BucketInterpreter<'a> {
     ) -> R<'env> {
         (
             Some(match bucket.parse_as {
+                ValueType::U32 => KnownU32(bucket.value),
                 ValueType::BigInt => {
-                    let constant = &self.constant_fields[bucket.value];
+                    let constant = self.mem.get_field_constant(bucket.value);
                     KnownBigInt(
                         BigInt::parse_bytes(constant.as_bytes(), 10)
                             .expect(format!("Cannot parse constant {}", constant).as_str()),
                     )
                 }
-                ValueType::U32 => KnownU32(bucket.value),
             }),
             env,
         )
@@ -289,7 +255,7 @@ impl<'a> BucketInterpreter<'a> {
                     LocationRule::Mapped { signal_code, indexes } => {
                         let mut acc_env = env;
                         let io_def =
-                            &self.io_map[&acc_env.get_subcmp_template_id(addr)][*signal_code];
+                            self.mem.get_iodef(&acc_env.get_subcmp_template_id(addr), signal_code);
                         let map_access = io_def.offset;
                         if indexes.len() > 0 {
                             let mut indexes_values = vec![];
@@ -381,7 +347,7 @@ impl<'a> BucketInterpreter<'a> {
                         let mut acc_env = env;
                         let name = Some(acc_env.get_subcmp_name(addr).clone());
                         let io_def =
-                            &self.io_map[&acc_env.get_subcmp_template_id(addr)][*signal_code];
+                            self.mem.get_iodef(&acc_env.get_subcmp_template_id(addr), signal_code);
                         let map_access = io_def.offset;
                         if indexes.len() > 0 {
                             let mut indexes_values = vec![];
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 3d6b1dd44..6da450698 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -4,8 +4,8 @@ use compiler::num_bigint::BigInt;
 use compiler::num_traits::ToPrimitive;
 use compiler::intermediate_representation::new_id;
 use circom_algebra::modular_arithmetic;
-use circom_algebra::modular_arithmetic::ArithmeticError;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
+use crate::passes::memory::PassMemory;
 
 pub trait JoinSemiLattice {
     fn join(&self, other: &Self) -> Self;
@@ -84,7 +84,7 @@ impl Value {
         }
     }
 
-    pub fn to_value_bucket(&self, constant_fields: &mut Vec<String>) -> ValueBucket {
+    pub fn to_value_bucket(&self, mem: &PassMemory) -> ValueBucket {
         match self {
             Unknown => panic!("Can't create a value bucket from an unknown value!"),
             KnownU32(n) => ValueBucket {
@@ -96,20 +96,15 @@ impl Value {
                 op_aux_no: 0,
                 value: *n,
             },
-            KnownBigInt(n) => {
-                let str_repr = n.to_string();
-                let idx = constant_fields.len();
-                constant_fields.push(str_repr);
-                ValueBucket {
-                    id: new_id(),
-                    source_file_id: None,
-                    line: 0,
-                    message_id: 0,
-                    parse_as: ValueType::BigInt,
-                    op_aux_no: 0,
-                    value: idx,
-                }
-            }
+            KnownBigInt(n) => ValueBucket {
+                id: new_id(),
+                source_file_id: None,
+                line: 0,
+                message_id: 0,
+                parse_as: ValueType::BigInt,
+                op_aux_no: 0,
+                value: mem.add_field_constant(n.to_string()),
+            },
         }
     }
 }
@@ -142,7 +137,7 @@ fn wrap_op_result(
     rhs: &Value,
     field: &BigInt,
     u32_op: impl Fn(&usize, &usize) -> usize,
-    bigint_op: impl Fn(&BigInt, &BigInt, &BigInt) -> Result<BigInt, ArithmeticError>,
+    bigint_op: impl Fn(&BigInt, &BigInt, &BigInt) -> Result<BigInt, modular_arithmetic::ArithmeticError>,
 ) -> Value {
     match (lhs, rhs) {
         (Unknown, _) => Unknown,
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 84e32ed5d..faba64d1e 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -11,14 +11,14 @@ use crate::passes::memory::PassMemory;
 
 pub struct ConditionalFlattening {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<BranchBucket, bool>>,
 }
 
 impl ConditionalFlattening {
     pub fn new(prime: &String) -> Self {
         ConditionalFlattening {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
@@ -74,8 +74,7 @@ impl InterpreterObserver for ConditionalFlattening {
     }
 
     fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
         let (_, cond_result, _) = interpreter.execute_conditional_bucket(
             &bucket.cond,
             &bucket.if_branch,
@@ -112,16 +111,16 @@ impl CircuitTransformationPass for ConditionalFlattening {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_branch_bucket(&self, bucket: &BranchBucket) -> InstructionPointer {
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 795c89b00..99b612143 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -11,14 +11,14 @@ use crate::passes::memory::PassMemory;
 
 pub struct DeterministicSubCmpInvokePass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<AddressType, StatusInput>>,
 }
 
 impl DeterministicSubCmpInvokePass {
     pub fn new(prime: &String) -> Self {
         DeterministicSubCmpInvokePass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
@@ -35,8 +35,7 @@ impl DeterministicSubCmpInvokePass {
         } = address_type
         {
             let env = env.clone();
-            let mem = self.memory.borrow();
-            let interpreter = mem.build_interpreter(self);
+            let interpreter = self.memory.build_interpreter(self);
             let (addr, env) = interpreter.execute_instruction(cmp_address, env, false);
             let addr = addr
                 .expect("cmp_address instruction in SubcmpSignal must produce a value!")
@@ -130,16 +129,16 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_address_type(&self, address: &AddressType) -> AddressType {
diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index 8aed5c73b..eb242d103 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -13,21 +13,20 @@ use crate::passes::memory::PassMemory;
 
 pub struct LoopUnrollPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
 }
 
 impl LoopUnrollPass {
     pub fn new(prime: &String) -> Self {
         LoopUnrollPass {
-            memory: PassMemory::new_cell(prime, String::from(""), Default::default()),
+            memory: PassMemory::new(prime, String::from(""), Default::default()),
             replacements: Default::default(),
         }
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
         let mut block_body = vec![];
         let mut cond_result = Some(true);
         let mut env = env.clone();
@@ -55,8 +54,7 @@ impl LoopUnrollPass {
     // Will take the unrolled loop and interpretate it
     // checking if new loop buckets appear
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
         interpreter.execute_block_bucket(bucket, env.clone(), true);
     }
 }
@@ -149,16 +147,16 @@ impl CircuitTransformationPass for LoopUnrollPass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_loop_bucket(&self, bucket: &LoopBucket) -> InstructionPointer {
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 6e29072d0..794ad73cc 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -13,14 +13,14 @@ use crate::passes::memory::PassMemory;
 
 pub struct MappedToIndexedPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<LocationRule, LocationRule>>,
 }
 
 impl MappedToIndexedPass {
     pub fn new(prime: &String) -> Self {
         MappedToIndexedPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
@@ -32,8 +32,7 @@ impl MappedToIndexedPass {
         signal_code: usize,
         env: &Env,
     ) -> LocationRule {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
 
         let (resolved_addr, acc_env) =
             interpreter.execute_instruction(cmp_address, env.clone(), false);
@@ -44,7 +43,8 @@ impl MappedToIndexedPass {
 
         let mut acc_env = acc_env;
         let name = acc_env.get_subcmp_name(resolved_addr).clone();
-        let io_def = &mem.io_map[&acc_env.get_subcmp_template_id(resolved_addr)][signal_code];
+        let io_def =
+            self.memory.get_iodef(&acc_env.get_subcmp_template_id(resolved_addr), &signal_code);
         let map_access = io_def.offset;
         if indexes.len() > 0 {
             let mut indexes_values = vec![];
@@ -54,15 +54,13 @@ impl MappedToIndexedPass {
                 acc_env = new_env;
             }
             let offset = compute_offset(&indexes_values, &io_def.lengths);
-            let mut unused = vec![];
             LocationRule::Indexed {
-                location: KnownU32(map_access + offset).to_value_bucket(&mut unused).allocate(),
+                location: KnownU32(map_access + offset).to_value_bucket(&self.memory).allocate(),
                 template_header: Some(name),
             }
         } else {
-            let mut unused = vec![];
             LocationRule::Indexed {
-                location: KnownU32(map_access).to_value_bucket(&mut unused).allocate(),
+                location: KnownU32(map_access).to_value_bucket(&self.memory).allocate(),
                 template_header: Some(name),
             }
         }
@@ -172,7 +170,7 @@ impl CircuitTransformationPass for MappedToIndexedPass {
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     /*
@@ -196,11 +194,11 @@ impl CircuitTransformationPass for MappedToIndexedPass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 }
diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/passes/memory.rs
index 55b8631b3..214682ccd 100644
--- a/circuit_passes/src/passes/memory.rs
+++ b/circuit_passes/src/passes/memory.rs
@@ -1,6 +1,7 @@
 use std::cell::RefCell;
 use std::collections::HashMap;
-use code_producers::components::TemplateInstanceIOMap;
+use std::ops::Range;
+use code_producers::components::{TemplateInstanceIOMap, IODef};
 use code_producers::llvm_elements::IndexMapping;
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
@@ -11,47 +12,46 @@ use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 
 pub struct PassMemory {
-    pub templates_library: TemplatesLibrary,
-    pub functions_library: FunctionsLibrary,
-    pub prime: String,
-    pub constant_fields: Vec<String>,
-    pub current_scope: String,
-    pub io_map: TemplateInstanceIOMap,
-    pub signal_index_mapping: HashMap<String, IndexMapping>,
-    pub variables_index_mapping: HashMap<String, IndexMapping>,
-    pub component_addr_index_mapping: HashMap<String, IndexMapping>,
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
+    templates_library: RefCell<TemplatesLibrary>,
+    functions_library: RefCell<FunctionsLibrary>,
+    constant_fields: RefCell<Vec<String>>,
+    current_scope: RefCell<String>,
+    io_map: RefCell<TemplateInstanceIOMap>,
+    signal_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    variables_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    component_addr_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    prime: String,
 }
 
 impl PassMemory {
-    pub fn new_cell(
-        prime: &String,
-        current_scope: String,
-        io_map: TemplateInstanceIOMap,
-    ) -> RefCell<Self> {
-        RefCell::new(PassMemory {
+    pub fn new(prime: &String, current_scope: String, io_map: TemplateInstanceIOMap) -> Self {
+        PassMemory {
+            prime: prime.to_string(),
+            current_scope: RefCell::new(current_scope),
+            io_map: RefCell::new(io_map),
+            constant_fields: Default::default(),
             templates_library: Default::default(),
             functions_library: Default::default(),
-            prime: prime.to_string(),
-            constant_fields: vec![],
-            current_scope,
-            io_map,
             signal_index_mapping: Default::default(),
             variables_index_mapping: Default::default(),
             component_addr_index_mapping: Default::default(),
-        })
+        }
     }
 
-    pub fn set_scope(&mut self, template: &TemplateCode) {
-        self.current_scope = template.header.clone();
+    pub fn set_scope(&self, template: &TemplateCode) {
+        self.current_scope.replace(template.header.clone());
     }
 
     pub fn run_template(&self, observer: &dyn InterpreterObserver, template: &TemplateCode) {
-        assert!(!self.current_scope.is_empty());
+        assert!(!self.current_scope.borrow().is_empty());
         if cfg!(debug_assertions) {
-            println!("Running template {}", self.current_scope);
+            println!("Running template {}", self.current_scope.borrow());
         }
         let interpreter = self.build_interpreter(observer);
-        let env = Env::new(&self.templates_library, &self.functions_library, self);
+        let lib_t = self.templates_library.borrow();
+        let lib_f = self.functions_library.borrow();
+        let env = Env::new(&lib_t, &lib_f, self);
         interpreter.execute_instructions(&template.body, env, true);
     }
 
@@ -59,46 +59,86 @@ impl PassMemory {
         &'a self,
         observer: &'a dyn InterpreterObserver,
     ) -> BucketInterpreter {
-        self.build_interpreter_with_scope(observer, &self.current_scope)
+        self.build_interpreter_with_scope(observer, self.current_scope.borrow().to_string())
     }
 
     fn build_interpreter_with_scope<'a>(
         &'a self,
         observer: &'a dyn InterpreterObserver,
-        scope: &'a String,
+        scope: String,
     ) -> BucketInterpreter {
-        BucketInterpreter::init(
-            scope,
-            &self.prime,
-            &self.constant_fields,
-            observer,
-            &self.io_map,
-            &self.signal_index_mapping[scope],
-            &self.variables_index_mapping[scope],
-            &self.component_addr_index_mapping[scope],
-        )
+        BucketInterpreter::init(observer, self, scope)
     }
 
-    pub fn add_template(&mut self, template: &TemplateCode) {
-        self.templates_library.insert(template.header.clone(), (*template).clone());
+    pub fn add_template(&self, template: &TemplateCode) {
+        self.templates_library.borrow_mut().insert(template.header.clone(), (*template).clone());
     }
 
-    pub fn add_function(&mut self, function: &FunctionCode) {
-        self.functions_library.insert(function.header.clone(), (*function).clone());
+    pub fn add_function(&self, function: &FunctionCode) {
+        self.functions_library.borrow_mut().insert(function.header.clone(), (*function).clone());
     }
 
-    pub fn fill_from_circuit(&mut self, circuit: &Circuit) {
+    pub fn fill_from_circuit(&self, circuit: &Circuit) {
         for template in &circuit.templates {
             self.add_template(template);
         }
         for function in &circuit.functions {
             self.add_function(function);
         }
-        self.constant_fields = circuit.llvm_data.field_tracking.clone();
-        self.io_map = circuit.llvm_data.io_map.clone();
-        self.variables_index_mapping = circuit.llvm_data.variable_index_mapping.clone();
-        self.signal_index_mapping = circuit.llvm_data.signal_index_mapping.clone();
-        self.component_addr_index_mapping = circuit.llvm_data.component_index_mapping.clone();
+        self.constant_fields.replace(circuit.llvm_data.field_tracking.clone());
+        self.io_map.replace(circuit.llvm_data.io_map.clone());
+        self.variables_index_mapping.replace(circuit.llvm_data.variable_index_mapping.clone());
+        self.signal_index_mapping.replace(circuit.llvm_data.signal_index_mapping.clone());
+        self.component_addr_index_mapping
+            .replace(circuit.llvm_data.component_index_mapping.clone());
+    }
+
+    pub fn get_prime(&self) -> &String {
+        &self.prime
+    }
+
+    pub fn get_field_constant(&self, index: usize) -> String {
+        self.constant_fields.borrow()[index].clone()
+    }
+
+    pub fn get_field_constants_clone(&self) -> Vec<String> {
+        self.constant_fields.borrow().clone()
+    }
+
+    /// Stores a new constant and returns its index
+    pub fn add_field_constant(&self, new_value: String) -> usize {
+        let mut temp = self.constant_fields.borrow_mut();
+        let idx = temp.len();
+        temp.push(new_value);
+        idx
+    }
+
+    pub fn get_iodef(&self, template_id: &usize, signal_code: &usize) -> IODef {
+        self.io_map.borrow()[template_id][*signal_code].clone()
+    }
+
+    pub fn get_signal_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.signal_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_signal_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_signal_index_mapping(&self.current_scope.borrow(), index)
+    }
+
+    pub fn get_variables_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.variables_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_variables_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_variables_index_mapping(&self.current_scope.borrow(), index)
+    }
+
+    pub fn get_component_addr_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.component_addr_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_component_addr_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_component_addr_index_mapping(&self.current_scope.borrow(), index)
     }
 }
 
@@ -108,6 +148,6 @@ impl ContextSwitcher for PassMemory {
         interpreter: &'a BucketInterpreter<'a>,
         scope: &'a String,
     ) -> BucketInterpreter<'a> {
-        self.build_interpreter_with_scope(interpreter.observer, scope)
+        self.build_interpreter_with_scope(interpreter.observer, scope.to_string())
     }
 }
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 7f8db765d..906f8a974 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -14,12 +14,12 @@ use crate::passes::checks::assert_unique_ids_in_circuit;
 
 mod conditional_flattening;
 mod loop_unroll;
-mod memory;
 mod simplification;
 mod deterministic_subcomponent_invocation;
 mod mapped_to_indexed;
 mod unknown_index_sanitization;
 mod checks;
+pub(crate) mod memory;
 
 macro_rules! pre_hook {
     ($name: ident, $bucket_ty: ty) => {
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 4868231da..3fab2932e 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -12,7 +12,7 @@ use crate::passes::memory::PassMemory;
 
 pub struct SimplificationPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     compute_replacements: RefCell<BTreeMap<ComputeBucket, Value>>,
     call_replacements: RefCell<BTreeMap<CallBucket, Value>>,
 }
@@ -20,7 +20,7 @@ pub struct SimplificationPass {
 impl SimplificationPass {
     pub fn new(prime: &String) -> Self {
         SimplificationPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             compute_replacements: Default::default(),
             call_replacements: Default::default(),
         }
@@ -42,8 +42,7 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_compute_bucket(&self, bucket: &ComputeBucket, env: &Env) -> bool {
         let env = env.clone();
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
         let (eval, _) = interpreter.execute_compute_bucket(bucket, env, false);
         let eval = eval.expect("Compute bucket must produce a value!");
         if !eval.is_unknown() {
@@ -83,8 +82,7 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool {
         let env = env.clone();
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self);
         let (eval, _) = interpreter.execute_call_bucket(bucket, env, false);
         if let Some(eval) = eval {
             // Call buckets may not return a value directly
@@ -123,13 +121,12 @@ impl CircuitTransformationPass for SimplificationPass {
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_compute_bucket(&self, bucket: &ComputeBucket) -> InstructionPointer {
         if let Some(value) = self.compute_replacements.borrow().get(&bucket) {
-            let constant_fields = &mut self.memory.borrow_mut().constant_fields;
-            return value.to_value_bucket(constant_fields).allocate();
+            return value.to_value_bucket(&self.memory).allocate();
         }
         ComputeBucket {
             id: new_id(),
@@ -145,8 +142,7 @@ impl CircuitTransformationPass for SimplificationPass {
 
     fn transform_call_bucket(&self, bucket: &CallBucket) -> InstructionPointer {
         if let Some(value) = self.call_replacements.borrow().get(&bucket) {
-            let constant_fields = &mut self.memory.borrow_mut().constant_fields;
-            return value.to_value_bucket(constant_fields).allocate();
+            return value.to_value_bucket(&self.memory).allocate();
         }
         CallBucket {
             id: new_id(),
@@ -163,11 +159,11 @@ impl CircuitTransformationPass for SimplificationPass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index 68d92da9d..d4762d239 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -83,7 +83,7 @@ impl<'a> ZeroingInterpreter<'a> {
 
 pub struct UnknownIndexSanitizationPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     load_replacements: RefCell<BTreeMap<LoadBucket, Range<usize>>>,
     store_replacements: RefCell<BTreeMap<StoreBucket, Range<usize>>>,
 }
@@ -94,7 +94,7 @@ pub struct UnknownIndexSanitizationPass {
 impl UnknownIndexSanitizationPass {
     pub fn new(prime: &String) -> Self {
         UnknownIndexSanitizationPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             load_replacements: Default::default(),
             store_replacements: Default::default(),
         }
@@ -106,16 +106,6 @@ impl UnknownIndexSanitizationPass {
         location: &LocationRule,
         env: &Env,
     ) -> Range<usize> {
-        let mem = self.memory.borrow();
-        let interpreter = ZeroingInterpreter::init(&mem.prime, &mem.constant_fields);
-        let current_scope = &mem.current_scope;
-
-        let mapping = match address {
-            AddressType::Variable => &mem.variables_index_mapping[current_scope],
-            AddressType::Signal => &mem.signal_index_mapping[current_scope],
-            AddressType::SubcmpSignal { .. } => &mem.component_addr_index_mapping[current_scope],
-        };
-
         /*
          * We assume locations are of the form:
          *      (base_offset + (mul_offset * UNKNOWN))
@@ -126,17 +116,25 @@ impl UnknownIndexSanitizationPass {
          * a similar pattern that is also handled here.
          */
         match location {
+            LocationRule::Mapped { .. } => unreachable!(),
             LocationRule::Indexed { location, .. } => {
+                let mem = &self.memory;
+                let constant_fields = mem.get_field_constants_clone();
+                let interpreter = ZeroingInterpreter::init(mem.get_prime(), &constant_fields);
                 let (res, _) = interpreter.execute_instruction(location, env.clone());
 
                 let offset = match res {
                     Some(KnownU32(base)) => base,
                     _ => unreachable!(),
                 };
-
-                mapping[&offset].clone()
+                match address {
+                    AddressType::Variable => mem.get_current_scope_variables_index_mapping(&offset),
+                    AddressType::Signal => mem.get_current_scope_signal_index_mapping(&offset),
+                    AddressType::SubcmpSignal { .. } => {
+                        mem.get_current_scope_component_addr_index_mapping(&offset)
+                    }
+                }
             }
-            LocationRule::Mapped { .. } => unreachable!(),
         }
     }
 
@@ -146,7 +144,7 @@ impl UnknownIndexSanitizationPass {
         location: &LocationRule,
         env: &Env,
     ) -> bool {
-        let mem = self.memory.borrow();
+        let mem = &self.memory;
         let interpreter = mem.build_interpreter(self);
 
         let resolved_addr = match location {
@@ -291,15 +289,15 @@ impl CircuitTransformationPass for UnknownIndexSanitizationPass {
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
     }
 }

From a5946dd90c12cba49b254879f81db3965d44f8a8 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 22 Aug 2023 17:06:32 -0500
Subject: [PATCH 02/42] code cleanup

---
 circuit_passes/src/bucket_interpreter/env.rs | 2 +-
 circuit_passes/src/bucket_interpreter/mod.rs | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index f94292b46..bfc7e4b60 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -222,7 +222,7 @@ impl<'a> Env<'a> {
         count: usize,
         template_id: usize,
     ) -> Self {
-        let number_of_inputs = { self.templates_library[name].number_of_inputs };
+        let number_of_inputs = self.templates_library[name].number_of_inputs;
         let mut copy = self;
         for i in base_index..(base_index + count) {
             copy.subcmps.insert(i, SubcmpEnv::new(number_of_inputs, name, template_id));
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 250da58dc..c463f62e9 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -435,11 +435,7 @@ impl<'a> BucketInterpreter<'a> {
             env = new_env;
             args.push(value.expect("Function argument must produce a value!"));
         }
-
-        let any_unknown = args.iter().any(|v| v.is_unknown());
-
-        //let result = env.run_function(&bucket.symbol, self, args, observe);
-        let result = if any_unknown {
+        let result = if args.iter().any(|v| v.is_unknown()) {
             Unknown
         } else {
             env.run_function(&bucket.symbol, self, args, observe)

From 0bd386e8e75e04df5c96d9a2e8dd91ec2f452b91 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 23 Aug 2023 14:27:18 -0500
Subject: [PATCH 03/42] implement loop body extraction for very basic case

---
 circuit_passes/src/bucket_interpreter/env.rs  |  28 ++-
 circuit_passes/src/passes/loop_unroll.rs      | 176 +++++++++++++++--
 circuit_passes/src/passes/memory.rs           |  20 +-
 circuit_passes/src/passes/mod.rs              |   8 +-
 code_producers/src/llvm_elements/fr.rs        |  18 ++
 code_producers/src/llvm_elements/functions.rs | 187 +++++++++++++++---
 code_producers/src/llvm_elements/mod.rs       |  40 +++-
 code_producers/src/llvm_elements/stdlib.rs    |   2 +
 code_producers/src/llvm_elements/template.rs  | 181 ++++++++---------
 compiler/src/circuit_design/circuit.rs        |  50 ++++-
 .../call_bucket.rs                            |  99 ++++++----
 11 files changed, 597 insertions(+), 212 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index bfc7e4b60..81ae2a752 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -1,3 +1,4 @@
+use std::cell::Ref;
 use std::collections::HashMap;
 use std::fmt::{Display, Formatter};
 use compiler::circuit_design::function::FunctionCode;
@@ -16,6 +17,11 @@ pub trait ContextSwitcher {
     ) -> BucketInterpreter<'a>;
 }
 
+pub trait LibraryAccess {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode>;
+    fn get_template(&self, name: &String) -> Ref<TemplateCode>;
+}
+
 impl<L: JoinSemiLattice + Clone> JoinSemiLattice for HashMap<usize, L> {
     fn join(&self, other: &Self) -> Self {
         let mut new: HashMap<usize, L> = Default::default();
@@ -97,8 +103,7 @@ pub struct Env<'a> {
     vars: HashMap<usize, Value>,
     signals: HashMap<usize, Value>,
     subcmps: HashMap<usize, SubcmpEnv<'a>>,
-    templates_library: &'a TemplatesLibrary,
-    functions_library: &'a FunctionsLibrary,
+    libs: &'a dyn LibraryAccess,
     context_switcher: &'a dyn ContextSwitcher,
 }
 
@@ -113,17 +118,12 @@ impl Display for Env<'_> {
 }
 
 impl<'a> Env<'a> {
-    pub fn new(
-        templates_library: &'a TemplatesLibrary,
-        functions_library: &'a FunctionsLibrary,
-        context_switcher: &'a dyn ContextSwitcher,
-    ) -> Self {
+    pub fn new(libs: &'a dyn LibraryAccess, context_switcher: &'a dyn ContextSwitcher) -> Self {
         Env {
             vars: Default::default(),
             signals: Default::default(),
             subcmps: Default::default(),
-            templates_library,
-            functions_library,
+            libs,
             context_switcher,
         }
     }
@@ -222,7 +222,7 @@ impl<'a> Env<'a> {
         count: usize,
         template_id: usize,
     ) -> Self {
-        let number_of_inputs = self.templates_library[name].number_of_inputs;
+        let number_of_inputs = self.libs.get_template(name).number_of_inputs;
         let mut copy = self;
         for i in base_index..(base_index + count) {
             copy.subcmps.insert(i, SubcmpEnv::new(number_of_inputs, name, template_id));
@@ -240,9 +240,8 @@ impl<'a> Env<'a> {
         if cfg!(debug_assertions) {
             println!("Running function {}", name);
         }
-        let code = &self.functions_library[name].body;
-        let mut function_env =
-            Env::new(self.templates_library, self.functions_library, self.context_switcher);
+        let code = &self.libs.get_function(name).body;
+        let mut function_env = Env::new(self.libs, self.context_switcher);
         for (id, arg) in args.iter().enumerate() {
             function_env = function_env.set_var(id, arg.clone());
         }
@@ -260,8 +259,7 @@ impl<'a> Env<'a> {
             vars: self.vars.join(&other.vars),
             signals: self.signals.join(&other.signals),
             subcmps: self.subcmps.join(&other.subcmps),
-            templates_library: self.templates_library,
-            functions_library: self.functions_library,
+            libs: self.libs,
             context_switcher: self.context_switcher,
         }
     }
diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index eb242d103..19d9eb3dc 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -1,7 +1,12 @@
 use std::cell::RefCell;
 use std::collections::BTreeMap;
+use std::vec;
+use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
+use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_0_PTR;
+use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
+use compiler::hir::very_concrete_program::Param;
 use compiler::intermediate_representation::{
     BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
 };
@@ -15,6 +20,7 @@ pub struct LoopUnrollPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
+    new_functions: RefCell<Vec<FunctionCode>>,
 }
 
 impl LoopUnrollPass {
@@ -22,33 +28,161 @@ impl LoopUnrollPass {
         LoopUnrollPass {
             memory: PassMemory::new(prime, String::from(""), Default::default()),
             replacements: Default::default(),
+            new_functions: Default::default(),
         }
     }
 
+    fn extract_body(&self, bucket: &LoopBucket) -> String {
+        // Copy loop body and add a "return void" at the end
+        let mut new_body = vec![];
+        for s in &bucket.body {
+            let mut copy = s.clone();
+            copy.update_id();
+            new_body.push(copy);
+        }
+        new_body.push(
+            ReturnBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                with_size: usize::MAX, // size > 1 will produce "return void" LLVM instruction
+                value: NopBucket { id: new_id() }.allocate(),
+            }
+            .allocate(),
+        );
+        // Create new function to hold the copied body
+        let name = format!("{}loop.body.{}", GENERATED_FN_PREFIX, new_id());
+        let new_func = Box::new(FunctionCodeInfo {
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            name: name.clone(),
+            header: name.clone(),
+            body: new_body,
+            params: vec![
+                Param { name: String::from("signals"), length: vec![0] },
+                Param { name: String::from("lvars"), length: vec![0] },
+            ],
+            returns: vec![0], // this will produce void return type on the function
+            ..FunctionCodeInfo::default()
+        });
+        self.memory.add_function(&new_func);
+        self.new_functions.borrow_mut().push(new_func);
+        //
+        name
+    }
+
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        let interpreter = self.memory.build_interpreter(self);
+        // Compute loop iteration count. If unknown, return immediately.
+        let loop_count;
+        {
+            let mut iters = 0;
+            let interpreter = self.memory.build_interpreter(self);
+            let mut inner_env = env.clone();
+            loop {
+                let (_, cond, new_env) =
+                    interpreter.execute_loop_bucket_once(bucket, inner_env, false);
+                match cond {
+                    // If the conditional becomes unknown just give up.
+                    None => return (None, 0),
+                    // When conditional becomes `false`, iteration count is complete.
+                    Some(false) => break,
+                    // Otherwise, continue counting.
+                    Some(true) => iters += 1,
+                }
+                inner_env = new_env;
+            }
+            loop_count = iters;
+        }
+
+        // If the loop body contains more than one instruction, extract it into a
+        // new function and generate 'loop_count' number of calls to that function.
+        // Otherwise, just duplicate the body 'loop_count' number of times.
         let mut block_body = vec![];
-        let mut cond_result = Some(true);
-        let mut env = env.clone();
-        let mut iters = 0;
-        while cond_result.unwrap() {
-            let (_, new_cond, new_env) = interpreter.execute_loop_bucket_once(bucket, env, false);
-            if new_cond.is_none() {
-                return (None, 0); // If the conditional becomes Unknown just give up.
+        match &bucket.body[..] {
+            [a] => {
+                for _ in 0..loop_count {
+                    let mut copy = a.clone();
+                    copy.update_id();
+                    block_body.push(copy);
+                }
             }
-            cond_result = new_cond;
-            env = new_env;
-            if let Some(true) = new_cond {
-                iters += 1;
-                for inst in &bucket.body {
-                    block_body.push(inst.clone());
+            b => {
+                assert!(b.len() > 1);
+                //
+                //TODO: If any subcmps are used inside the loop body, add an additional '[0 x i256]*' parameter on
+                //  the new function for each one that is used and pass the arena of each into the function call.
+                //
+                //TODO: Any value indexed by a variable that changes from one loop iteration to another needs to
+                //  be indexed outside of the function and then have just that pointer passed into the function.
+                //
+                let name = self.extract_body(bucket);
+                for _ in 0..loop_count {
+                    block_body.push(
+                        CallBucket {
+                            id: new_id(),
+                            source_file_id: bucket.source_file_id,
+                            line: bucket.line,
+                            message_id: bucket.message_id,
+                            symbol: name.clone(),
+                            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+                            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+                            argument_types: vec![], // LLVM IR generation doesn't use this field
+                            arguments: vec![
+                                // Parameter for signals/arena
+                                LoadBucket {
+                                    id: new_id(),
+                                    source_file_id: bucket.source_file_id,
+                                    line: bucket.line,
+                                    message_id: bucket.message_id,
+                                    address_type: AddressType::Signal,
+                                    src: LocationRule::Indexed {
+                                        location: ValueBucket {
+                                            id: new_id(),
+                                            source_file_id: bucket.source_file_id,
+                                            line: bucket.line,
+                                            message_id: bucket.message_id,
+                                            parse_as: ValueType::U32,
+                                            op_aux_no: 0,
+                                            value: 0,
+                                        }
+                                        .allocate(),
+                                        template_header: None,
+                                    },
+                                    bounded_fn: Some(String::from(FR_IDENTITY_ARR_0_PTR)),
+                                }
+                                .allocate(),
+                                // Parameter for local vars
+                                LoadBucket {
+                                    id: new_id(),
+                                    source_file_id: bucket.source_file_id,
+                                    line: bucket.line,
+                                    message_id: bucket.message_id,
+                                    address_type: AddressType::Variable,
+                                    src: LocationRule::Indexed {
+                                        location: ValueBucket {
+                                            id: new_id(),
+                                            source_file_id: bucket.source_file_id,
+                                            line: bucket.line,
+                                            message_id: bucket.message_id,
+                                            parse_as: ValueType::U32,
+                                            op_aux_no: 0,
+                                            value: 0,
+                                        }
+                                        .allocate(),
+                                        template_header: None,
+                                    },
+                                    bounded_fn: Some(String::from(FR_IDENTITY_ARR_0_PTR)),
+                                }
+                                .allocate(),
+                            ],
+                        }
+                        .allocate(),
+                    );
                 }
             }
         }
-        for inst in &mut block_body {
-            inst.update_id();
-        }
-        (Some(block_body), iters)
+        (Some(block_body), loop_count)
     }
 
     // Will take the unrolled loop and interpretate it
@@ -150,6 +284,12 @@ impl CircuitTransformationPass for LoopUnrollPass {
         self.memory.fill_from_circuit(circuit);
     }
 
+    fn post_hook_circuit(&self, cir: &mut Circuit) {
+        for f in self.new_functions.borrow().iter() {
+            cir.functions.push(self.transform_function(&f));
+        }
+    }
+
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
         self.memory.run_template(self, template);
diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/passes/memory.rs
index 214682ccd..eeedb73e1 100644
--- a/circuit_passes/src/passes/memory.rs
+++ b/circuit_passes/src/passes/memory.rs
@@ -1,4 +1,4 @@
-use std::cell::RefCell;
+use std::cell::{RefCell, Ref};
 use std::collections::HashMap;
 use std::ops::Range;
 use code_producers::components::{TemplateInstanceIOMap, IODef};
@@ -7,7 +7,9 @@ use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::env::{ContextSwitcher, FunctionsLibrary, TemplatesLibrary};
+use crate::bucket_interpreter::env::{
+    ContextSwitcher, FunctionsLibrary, TemplatesLibrary, LibraryAccess,
+};
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 
@@ -49,9 +51,7 @@ impl PassMemory {
             println!("Running template {}", self.current_scope.borrow());
         }
         let interpreter = self.build_interpreter(observer);
-        let lib_t = self.templates_library.borrow();
-        let lib_f = self.functions_library.borrow();
-        let env = Env::new(&lib_t, &lib_f, self);
+        let env = Env::new(self, self);
         interpreter.execute_instructions(&template.body, env, true);
     }
 
@@ -151,3 +151,13 @@ impl ContextSwitcher for PassMemory {
         self.build_interpreter_with_scope(interpreter.observer, scope.to_string())
     }
 }
+
+impl LibraryAccess for PassMemory {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        Ref::map(self.functions_library.borrow(), |map| &map[name])
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        Ref::map(self.templates_library.borrow(), |map| &map[name])
+    }
+}
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 906f8a974..68f73d5de 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -34,13 +34,15 @@ pub trait CircuitTransformationPass {
         self.pre_hook_circuit(&circuit);
         let templates = circuit.templates.iter().map(|t| self.transform_template(t)).collect();
         let field_tracking = self.get_updated_field_constants();
-        Circuit {
+        let mut new_circuit = Circuit {
             wasm_producer: circuit.wasm_producer.clone(),
             c_producer: circuit.c_producer.clone(),
             llvm_data: circuit.llvm_data.clone_with_new_field_tracking(field_tracking),
             templates,
             functions: circuit.functions.iter().map(|f| self.transform_function(f)).collect(),
-        }
+        };
+        self.post_hook_circuit(&mut new_circuit);
+        new_circuit
     }
 
     fn get_updated_field_constants(&self) -> Vec<String>;
@@ -368,6 +370,8 @@ pub trait CircuitTransformationPass {
         NopBucket { id: new_id() }.allocate()
     }
 
+    fn post_hook_circuit(&self, _cir: &mut Circuit) {}
+
     pre_hook!(pre_hook_circuit, Circuit);
     pre_hook!(pre_hook_template, TemplateCode);
     pre_hook!(pre_hook_function, FunctionCode);
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index cc994aa08..3b43d154e 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -37,6 +37,7 @@ pub const FR_LOR_FN_NAME: &str = "fr_logic_or";
 pub const FR_LNOT_FN_NAME: &str = "fr_logic_not";
 pub const FR_ADDR_CAST_FN_NAME: &str = "fr_cast_to_addr";
 pub const FR_ARRAY_COPY_FN_NAME: &str = "fr_copy_n";
+pub const FR_IDENTITY_ARR_0_PTR: &str = "identity_arr_0_ptr";
 
 macro_rules! fr_unary_op_base {
     ($name: expr, $producer: expr, $argTy: expr, $retTy: expr) => {{
@@ -271,6 +272,22 @@ pub fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return_void(producer);
 }
 
+pub fn identity_arr0_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let val_type = bigint_type(producer).array_type(0).ptr_type(Default::default());
+    let func = create_function(
+        producer,
+        &None,
+        0,
+        "",
+        FR_IDENTITY_ARR_0_PTR,
+        val_type.fn_type(&[val_type.into()], false),
+    );
+    let main = create_bb(producer, func, FR_IDENTITY_ARR_0_PTR);
+    producer.set_current_bb(main);
+    // Just return the parameter
+    create_return(producer, func.get_nth_param(0).unwrap());
+}
+
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     add_fn(producer);
     sub_fn(producer);
@@ -296,5 +313,6 @@ pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     logic_not_fn(producer);
     addr_cast_fn(producer);
     array_copy_fn(producer);
+    identity_arr0_ptr_fn(producer);
     pow_fn(producer); //uses functions generated by mul_fn & lt_fn
 }
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 67ebbb2ce..2567bdbd0 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -5,9 +5,9 @@ use inkwell::debug_info::AsDIScope;
 use inkwell::types::FunctionType;
 use inkwell::values::{AnyValueEnum, ArrayValue, FunctionValue, IntValue, PointerValue};
 
-use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer};
+use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer, TemplateCtx};
 use crate::llvm_elements::instructions::create_gep;
-use crate::llvm_elements::template::TemplateCtx;
+use crate::llvm_elements::values::zero;
 
 pub fn create_function<'a>(
     producer: &dyn LLVMIRProducer<'a>,
@@ -50,10 +50,40 @@ pub fn create_bb<'a>(
     producer.context().append_basic_block(func, name)
 }
 
+struct FunctionCtx<'a> {
+    current_function: FunctionValue<'a>,
+    arena: PointerValue<'a>,
+}
+
+impl<'a> FunctionCtx<'a> {
+    fn new(current_function: FunctionValue<'a>) -> Self {
+        FunctionCtx {
+            current_function,
+            arena: current_function
+                .get_nth_param(0)
+                .expect("Function needs at least one argument for the arena!")
+                .into_pointer_value(),
+        }
+    }
+}
+
+impl<'a> BodyCtx<'a> for FunctionCtx<'a> {
+    fn get_variable(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        create_gep(producer, self.arena, &[index])
+    }
+
+    fn get_variable_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+        self.arena.into()
+    }
+}
+
 pub struct FunctionLLVMIRProducer<'ctx: 'prod, 'prod> {
     parent: &'prod dyn LLVMIRProducer<'ctx>,
-    function_ctx: FunctionCtx<'ctx>,
-    current_function: FunctionValue<'ctx>,
+    ctx: FunctionCtx<'ctx>,
 }
 
 impl<'ctx, 'prod> FunctionLLVMIRProducer<'ctx, 'prod> {
@@ -61,11 +91,7 @@ impl<'ctx, 'prod> FunctionLLVMIRProducer<'ctx, 'prod> {
         producer: &'prod dyn LLVMIRProducer<'ctx>,
         current_function: FunctionValue<'ctx>,
     ) -> Self {
-        FunctionLLVMIRProducer {
-            parent: producer,
-            function_ctx: FunctionCtx::new(current_function),
-            current_function,
-        }
+        FunctionLLVMIRProducer { parent: producer, ctx: FunctionCtx::new(current_function) }
     }
 }
 
@@ -82,16 +108,16 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
         self.parent.set_current_bb(bb)
     }
 
-    fn template_ctx(&self) -> &TemplateCtx<'ctx> {
+    fn template_ctx(&self) -> &dyn TemplateCtx<'ctx> {
         self.parent.template_ctx()
     }
 
     fn body_ctx(&self) -> &dyn BodyCtx<'ctx> {
-        &self.function_ctx
+        &self.ctx
     }
 
     fn current_function(&self) -> FunctionValue<'ctx> {
-        self.current_function
+        self.ctx.current_function
     }
 
     fn builder(&self) -> &Builder<'ctx> {
@@ -107,31 +133,148 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
     }
 }
 
-pub struct FunctionCtx<'a> {
-    arena: PointerValue<'a>,
+struct ExtractedFunctionCtx<'a> {
+    current_function: FunctionValue<'a>,
+    //TODO: will have at least the signals-arena [0 x i256]* and lvars [0 x i256]* from the template and possible subcmps
+    signals: PointerValue<'a>,
+    lvars: PointerValue<'a>,
 }
 
-impl<'a> FunctionCtx<'a> {
-    pub fn new(current_function: FunctionValue<'a>) -> Self {
-        FunctionCtx {
-            arena: current_function
+impl<'a> ExtractedFunctionCtx<'a> {
+    fn new(current_function: FunctionValue<'a>) -> Self {
+        ExtractedFunctionCtx {
+            current_function,
+            signals: current_function
                 .get_nth_param(0)
-                .expect("Function needs at least one argument for the arena!")
+                .expect("Function must have at least 1 argument for signal array!")
+                .into_pointer_value(),
+            lvars: current_function
+                .get_nth_param(1)
+                .expect("Function must have at least 2 arguments for signal and lvar arrays!")
                 .into_pointer_value(),
         }
     }
 }
 
-impl<'a> BodyCtx<'a> for FunctionCtx<'a> {
+impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
     fn get_variable(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         index: IntValue<'a>,
     ) -> AnyValueEnum<'a> {
-        create_gep(producer, self.arena, &[index])
+        //'gep' must read through the pointer with 0 and then index the array
+        create_gep(producer, self.lvars, &[zero(producer), index])
     }
 
     fn get_variable_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
-        self.arena.into()
+        self.lvars.into()
+    }
+}
+
+impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
+    fn load_subcmp(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a> {
+        todo!();
+        //create_gep(producer, self.subcmps, &[zero(producer), id.into_int_value()]).into_pointer_value()
+    }
+
+    fn load_subcmp_addr(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a> {
+        todo!();
+        // let signals = create_gep(
+        //     producer,
+        //     self.subcmps,
+        //     &[zero(producer), id.into_int_value(), zero(producer)],
+        // )
+        // .into_pointer_value();
+        // create_load(producer, signals).into_pointer_value()
+    }
+
+    fn load_subcmp_counter(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a> {
+        todo!();
+        // create_gep(
+        //     producer,
+        //     self.subcmps,
+        //     &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
+        // )
+        // .into_pointer_value()
+    }
+
+    fn get_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        //'gep' must read through the pointer with 0 and then index the array
+        create_gep(producer, self.signals, &[zero(producer), index])
+    }
+
+    fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+        self.signals.into()
+    }
+}
+
+pub struct ExtractedFunctionLLVMIRProducer<'ctx: 'prod, 'prod> {
+    parent: &'prod dyn LLVMIRProducer<'ctx>,
+    ctx: ExtractedFunctionCtx<'ctx>,
+}
+
+impl<'ctx, 'prod> ExtractedFunctionLLVMIRProducer<'ctx, 'prod> {
+    pub fn new(
+        producer: &'prod dyn LLVMIRProducer<'ctx>,
+        current_function: FunctionValue<'ctx>,
+    ) -> Self {
+        ExtractedFunctionLLVMIRProducer {
+            parent: producer,
+            ctx: ExtractedFunctionCtx::new(current_function),
+        }
+    }
+}
+
+impl<'ctx, 'prod> LLVMIRProducer<'ctx> for ExtractedFunctionLLVMIRProducer<'ctx, 'prod> {
+    fn llvm(&self) -> &LLVM<'ctx> {
+        self.parent.llvm()
+    }
+
+    fn context(&self) -> ContextRef<'ctx> {
+        self.parent.context()
+    }
+
+    fn set_current_bb(&self, bb: BasicBlock<'ctx>) {
+        self.parent.set_current_bb(bb)
+    }
+
+    fn template_ctx(&self) -> &dyn TemplateCtx<'ctx> {
+        &self.ctx
+    }
+
+    fn body_ctx(&self) -> &dyn BodyCtx<'ctx> {
+        &self.ctx
+    }
+
+    fn current_function(&self) -> FunctionValue<'ctx> {
+        self.ctx.current_function
+    }
+
+    fn builder(&self) -> &Builder<'ctx> {
+        self.parent.builder()
+    }
+
+    fn constant_fields(&self) -> &Vec<String> {
+        self.parent.constant_fields()
+    }
+
+    fn get_template_mem_arg(&self, _run_fn: FunctionValue<'ctx>) -> ArrayValue<'ctx> {
+        panic!("The function llvm producer can't extract the template argument of a run function!");
     }
 }
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index f78aa6cb1..30af4796b 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -12,7 +12,7 @@ use inkwell::context::{Context, ContextRef};
 use inkwell::debug_info::{DebugInfoBuilder, DICompileUnit};
 use inkwell::module::Module;
 use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, IntType};
-use inkwell::values::{ArrayValue, BasicMetadataValueEnum, BasicValueEnum, IntValue};
+use inkwell::values::{ArrayValue, BasicMetadataValueEnum, BasicValueEnum, IntValue, PointerValue};
 pub use inkwell::types::AnyType;
 pub use inkwell::values::{AnyValue, AnyValueEnum, FunctionValue, InstructionOpcode};
 pub use inkwell::debug_info::AsDIScope;
@@ -22,7 +22,6 @@ use program_structure::program_archive::ProgramArchive;
 use crate::components::TemplateInstanceIOMap;
 use crate::llvm_elements::types::bool_type;
 use crate::llvm_elements::instructions::create_alloca;
-use crate::llvm_elements::template::TemplateCtx;
 
 pub mod stdlib;
 pub mod template;
@@ -46,11 +45,44 @@ pub trait BodyCtx<'a> {
     fn get_variable_array(&self, producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a>;
 }
 
+pub trait TemplateCtx<'a> {
+    /// Returns the memory address of the subcomponent
+    fn load_subcmp(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a>;
+
+    /// Creates the necessary code to load a subcomponent given the expression used as id
+    fn load_subcmp_addr(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a>;
+
+    /// Creates the necessary code to load a subcomponent counter given the expression used as id
+    fn load_subcmp_counter(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a>;
+
+    /// Returns a pointer to the signal associated to the index
+    fn get_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a>;
+
+    /// Returns a pointer to the signal array
+    fn get_signal_array(&self, producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a>;
+}
+
 pub trait LLVMIRProducer<'a> {
     fn llvm(&self) -> &LLVM<'a>;
     fn context(&self) -> ContextRef<'a>;
     fn set_current_bb(&self, bb: BasicBlock<'a>);
-    fn template_ctx(&self) -> &TemplateCtx<'a>;
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a>;
     fn body_ctx(&self) -> &dyn BodyCtx<'a>;
     fn current_function(&self) -> FunctionValue<'a>;
     fn builder(&self) -> &Builder<'a>;
@@ -100,7 +132,7 @@ impl<'a> LLVMIRProducer<'a> for TopLevelLLVMIRProducer<'a> {
         self.llvm().builder.position_at_end(bb);
     }
 
-    fn template_ctx(&self) -> &TemplateCtx<'a> {
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a> {
         panic!("The top level llvm producer does not hold a template context!");
     }
 
diff --git a/code_producers/src/llvm_elements/stdlib.rs b/code_producers/src/llvm_elements/stdlib.rs
index b6c347777..27546f463 100644
--- a/code_producers/src/llvm_elements/stdlib.rs
+++ b/code_producers/src/llvm_elements/stdlib.rs
@@ -2,6 +2,8 @@
 
 use crate::llvm_elements::LLVMIRProducer;
 
+//NOTE: LLVM identifiers can use "." and circom cannot which makes checking for this prefix unambiguous.
+pub const GENERATED_FN_PREFIX: &str = "..generated..";
 pub const CONSTRAINT_VALUES_FN_NAME: &str = "__constraint_values";
 pub const CONSTRAINT_VALUE_FN_NAME: &str = "__constraint_value";
 pub const ASSERT_FN_NAME: &str = "__assert";
diff --git a/code_producers/src/llvm_elements/template.rs b/code_producers/src/llvm_elements/template.rs
index 3152d6493..3d3090c0f 100644
--- a/code_producers/src/llvm_elements/template.rs
+++ b/code_producers/src/llvm_elements/template.rs
@@ -1,90 +1,27 @@
+use std::default::Default;
 use inkwell::basic_block::BasicBlock;
 use inkwell::builder::Builder;
 use inkwell::context::ContextRef;
 use inkwell::types::{AnyType, BasicType, PointerType};
 use inkwell::values::{AnyValueEnum, ArrayValue, FunctionValue, IntValue, PointerValue};
-
-use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer};
+use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer, TemplateCtx};
 use crate::llvm_elements::instructions::{create_alloca, create_gep, create_load};
 use crate::llvm_elements::types::{bigint_type, i32_type};
 use crate::llvm_elements::values::{create_literal_u32, zero};
-use std::default::Default;
 
-pub struct TemplateLLVMIRProducer<'ctx: 'prod, 'prod> {
-    parent: &'prod dyn LLVMIRProducer<'ctx>,
-    template_ctx: TemplateCtx<'ctx>,
-}
-
-impl<'a, 'b> LLVMIRProducer<'a> for TemplateLLVMIRProducer<'a, 'b> {
-    fn llvm(&self) -> &LLVM<'a> {
-        self.parent.llvm()
-    }
-
-    fn context(&self) -> ContextRef<'a> {
-        self.parent.context()
-    }
-
-    fn set_current_bb(&self, bb: BasicBlock<'a>) {
-        self.parent.set_current_bb(bb)
-    }
-
-    fn template_ctx(&self) -> &TemplateCtx<'a> {
-        &self.template_ctx
-    }
-
-    fn body_ctx(&self) -> &dyn BodyCtx<'a> {
-        &self.template_ctx
-    }
-
-    fn current_function(&self) -> FunctionValue<'a> {
-        self.template_ctx.current_function
-    }
-
-    fn builder(&self) -> &Builder<'a> {
-        self.parent.builder()
-    }
-
-    fn constant_fields(&self) -> &Vec<String> {
-        self.parent.constant_fields()
-    }
-
-    fn get_template_mem_arg(&self, run_fn: FunctionValue<'a>) -> ArrayValue<'a> {
-        run_fn
-            .get_nth_param(self.template_ctx.signals_arg_offset as u32)
-            .unwrap()
-            .into_array_value()
-    }
-}
-
-impl<'a, 'b> TemplateLLVMIRProducer<'a, 'b> {
-    pub fn new(
-        parent: &'b dyn LLVMIRProducer<'a>,
-        stack_depth: usize,
-        number_subcmps: usize,
-        current_function: FunctionValue<'a>,
-        template_type: PointerType<'a>,
-        signals_arg_offset: usize,
-    ) -> Self {
-        TemplateLLVMIRProducer {
-            parent,
-            template_ctx: TemplateCtx::new(
-                parent,
-                stack_depth,
-                number_subcmps,
-                current_function,
-                template_type,
-                signals_arg_offset,
-            ),
-        }
-    }
+pub fn create_template_struct<'a>(
+    producer: &dyn LLVMIRProducer<'a>,
+    n_signals: usize,
+) -> PointerType<'a> {
+    bigint_type(producer).array_type(n_signals as u32).ptr_type(Default::default())
 }
 
-pub struct TemplateCtx<'a> {
-    pub stack: PointerValue<'a>,
+struct StdTemplateCtx<'a> {
+    stack: PointerValue<'a>,
     subcmps: PointerValue<'a>,
-    pub current_function: FunctionValue<'a>,
-    pub template_type: PointerType<'a>,
-    pub signals_arg_offset: usize,
+    current_function: FunctionValue<'a>,
+    template_type: PointerType<'a>,
+    signals_arg_offset: usize,
 }
 
 #[inline]
@@ -106,8 +43,8 @@ fn setup_stack<'a>(producer: &dyn LLVMIRProducer<'a>, stack_depth: usize) -> Poi
         .into_pointer_value()
 }
 
-impl<'a> TemplateCtx<'a> {
-    pub fn new(
+impl<'a> StdTemplateCtx<'a> {
+    fn new(
         producer: &dyn LLVMIRProducer<'a>,
         stack_depth: usize,
         number_subcmps: usize,
@@ -115,7 +52,7 @@ impl<'a> TemplateCtx<'a> {
         template_type: PointerType<'a>,
         signals_arg_offset: usize,
     ) -> Self {
-        TemplateCtx {
+        StdTemplateCtx {
             stack: setup_stack(producer, stack_depth),
             subcmps: setup_subcmps(producer, number_subcmps),
             current_function,
@@ -123,9 +60,10 @@ impl<'a> TemplateCtx<'a> {
             signals_arg_offset,
         }
     }
+}
 
-    /// Returns the memory address of the subcomponent
-    pub fn load_subcmp(
+impl<'a> TemplateCtx<'a> for StdTemplateCtx<'a> {
+    fn load_subcmp(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
@@ -134,8 +72,7 @@ impl<'a> TemplateCtx<'a> {
             .into_pointer_value()
     }
 
-    /// Creates the necessary code to load a subcomponent given the expression used as id
-    pub fn load_subcmp_addr(
+    fn load_subcmp_addr(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
@@ -149,8 +86,7 @@ impl<'a> TemplateCtx<'a> {
         create_load(producer, signals).into_pointer_value()
     }
 
-    /// Creates the necessary code to load a subcomponent counter given the expression used as id
-    pub fn load_subcmp_counter(
+    fn load_subcmp_counter(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
@@ -163,8 +99,7 @@ impl<'a> TemplateCtx<'a> {
         .into_pointer_value()
     }
 
-    /// Returns a pointer to the signal associated to the index
-    pub fn get_signal(
+    fn get_signal(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         index: IntValue<'a>,
@@ -173,14 +108,13 @@ impl<'a> TemplateCtx<'a> {
         create_gep(producer, signals.into_pointer_value(), &[zero(producer), index])
     }
 
-    /// Returns a pointer to the signal array
-    pub fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+    fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
         let signals = self.current_function.get_nth_param(self.signals_arg_offset as u32).unwrap();
         signals.into_pointer_value().into()
     }
 }
 
-impl<'a> BodyCtx<'a> for TemplateCtx<'a> {
+impl<'a> BodyCtx<'a> for StdTemplateCtx<'a> {
     /// Returns a reference to the local variable associated to the index
     fn get_variable(
         &self,
@@ -195,9 +129,68 @@ impl<'a> BodyCtx<'a> for TemplateCtx<'a> {
     }
 }
 
-pub fn create_template_struct<'a>(
-    producer: &dyn LLVMIRProducer<'a>,
-    n_signals: usize,
-) -> PointerType<'a> {
-    bigint_type(producer).array_type(n_signals as u32).ptr_type(Default::default())
+pub struct TemplateLLVMIRProducer<'ctx: 'prod, 'prod> {
+    parent: &'prod dyn LLVMIRProducer<'ctx>,
+    ctx: StdTemplateCtx<'ctx>,
+}
+
+impl<'a, 'b> LLVMIRProducer<'a> for TemplateLLVMIRProducer<'a, 'b> {
+    fn llvm(&self) -> &LLVM<'a> {
+        self.parent.llvm()
+    }
+
+    fn context(&self) -> ContextRef<'a> {
+        self.parent.context()
+    }
+
+    fn set_current_bb(&self, bb: BasicBlock<'a>) {
+        self.parent.set_current_bb(bb)
+    }
+
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a> {
+        &self.ctx
+    }
+
+    fn body_ctx(&self) -> &dyn BodyCtx<'a> {
+        &self.ctx
+    }
+
+    fn current_function(&self) -> FunctionValue<'a> {
+        self.ctx.current_function
+    }
+
+    fn builder(&self) -> &Builder<'a> {
+        self.parent.builder()
+    }
+
+    fn constant_fields(&self) -> &Vec<String> {
+        self.parent.constant_fields()
+    }
+
+    fn get_template_mem_arg(&self, run_fn: FunctionValue<'a>) -> ArrayValue<'a> {
+        run_fn.get_nth_param(self.ctx.signals_arg_offset as u32).unwrap().into_array_value()
+    }
+}
+
+impl<'a, 'b> TemplateLLVMIRProducer<'a, 'b> {
+    pub fn new(
+        parent: &'b dyn LLVMIRProducer<'a>,
+        stack_depth: usize,
+        number_subcmps: usize,
+        current_function: FunctionValue<'a>,
+        template_type: PointerType<'a>,
+        signals_arg_offset: usize,
+    ) -> Self {
+        TemplateLLVMIRProducer {
+            parent,
+            ctx: StdTemplateCtx::new(
+                parent,
+                stack_depth,
+                number_subcmps,
+                current_function,
+                template_type,
+                signals_arg_offset,
+            ),
+        }
+    }
 }
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index 58dc940e3..10a5d1717 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -7,13 +7,15 @@ use crate::hir::very_concrete_program::VCP;
 use crate::intermediate_representation::ir_interface::ObtainMeta;
 use crate::translating_traits::*;
 use code_producers::c_elements::*;
-use code_producers::llvm_elements::array_switch::load_array_switch;
-use code_producers::wasm_elements::*;
 use code_producers::llvm_elements::*;
+use code_producers::llvm_elements::array_switch::load_array_switch;
 use code_producers::llvm_elements::fr::load_fr;
-use code_producers::llvm_elements::functions::{create_function, FunctionLLVMIRProducer};
-use code_producers::llvm_elements::stdlib::load_stdlib;
+use code_producers::llvm_elements::functions::{
+    create_function, FunctionLLVMIRProducer, ExtractedFunctionLLVMIRProducer,
+};
+use code_producers::llvm_elements::stdlib::{load_stdlib, GENERATED_FN_PREFIX};
 use code_producers::llvm_elements::types::{bigint_type, void_type};
+use code_producers::wasm_elements::*;
 use program_structure::program_archive::ProgramArchive;
 
 pub struct CompilationFlags {
@@ -70,7 +72,31 @@ impl WriteLLVMIR for Circuit {
         let mut funcs = HashMap::new();
         for f in &self.functions {
             let name = f.header.as_str();
-            let arena_ty = bigint_type(producer).ptr_type(Default::default());
+            let param_types = if name.starts_with(GENERATED_FN_PREFIX) {
+                // Use the FunctionCodeInfo instance to generate the vector of parameter types.
+                let mut types = vec![];
+                for p in &f.params {
+                    // This section is a little more complicated than desired because IntType and ArrayType do
+                    //  not have a common Trait that defines the `array_type` and `ptr_type` member functions.
+                    let ty = match &p.length.len() {
+                        0 => bigint_type(producer).ptr_type(Default::default()),
+                        1 => bigint_type(producer)
+                            .array_type(p.length[0] as u32)
+                            .ptr_type(Default::default()),
+                        _ => {
+                            let mut temp = bigint_type(producer).array_type(p.length[0] as u32);
+                            for size in &p.length[1..] {
+                                temp = temp.array_type(*size as u32);
+                            }
+                            temp.ptr_type(Default::default())
+                        }
+                    };
+                    types.push(ty.into());
+                }
+                types
+            } else {
+                vec![bigint_type(producer).ptr_type(Default::default()).into()]
+            };
             let function = create_function(
                 producer,
                 f.get_source_file_id(),
@@ -79,9 +105,9 @@ impl WriteLLVMIR for Circuit {
                 name,
                 if f.returns.is_empty() || (f.returns.len() == 1 && *f.returns.get(0).unwrap() == 1)
                 {
-                    bigint_type(producer).fn_type(&[arena_ty.into()], false)
+                    bigint_type(producer).fn_type(&param_types, false)
                 } else {
-                    void_type(producer).fn_type(&[arena_ty.into()], false)
+                    void_type(producer).fn_type(&param_types, false)
                 },
             );
             funcs.insert(name, function);
@@ -89,9 +115,13 @@ impl WriteLLVMIR for Circuit {
 
         // Code for the functions
         for f in &self.functions {
-            let function_producer = FunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]);
-            Self::manage_debug_loc_from_curr(&function_producer, f.as_ref());
-            f.produce_llvm_ir(&function_producer);
+            let x: Box<dyn LLVMIRProducer<'_>> = if f.header.starts_with(GENERATED_FN_PREFIX) {
+                Box::new(ExtractedFunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]))
+            } else {
+                Box::new(FunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]))
+            };
+            Self::manage_debug_loc_from_curr(x.as_ref(), f.as_ref());
+            f.produce_llvm_ir(x.as_ref());
         }
 
         // Code for the templates
diff --git a/compiler/src/intermediate_representation/call_bucket.rs b/compiler/src/intermediate_representation/call_bucket.rs
index cf5bb8e10..04a79813d 100644
--- a/compiler/src/intermediate_representation/call_bucket.rs
+++ b/compiler/src/intermediate_representation/call_bucket.rs
@@ -107,10 +107,24 @@ impl WriteLLVMIR for CallBucket {
     ) -> Option<LLVMInstruction<'a>> {
         Self::manage_debug_loc_from_curr(producer, self);
 
-        // Create array with arena_size size
-        let bigint_arr = bigint_type(producer).array_type(self.arena_size as u32);
-        let arena =
-            create_alloca(producer, bigint_arr.into(), format!("{}_arena", self.symbol).as_str());
+        // Check arena_size==0 which indicates arguments should not be placed into arena
+        let arena_size = self.arena_size;
+        if arena_size == 0 {
+            let mut args = vec![];
+            for arg in self.arguments.iter() {
+                args.push(to_basic_metadata_enum(
+                    arg.produce_llvm_ir(producer).expect("Call arguments must produce a value!"),
+                ));
+            }
+            let call_ret_val = create_call(producer, self.symbol.as_str(), &args);
+            return Some(call_ret_val);
+        } else {
+            // Create array with arena_size size
+            let arena = create_alloca(
+                producer,
+                bigint_type(producer).array_type(arena_size as u32).into(),
+                format!("{}_arena", self.symbol).as_str(),
+            );
 
         // Get the offsets based on the sizes of the arguments
         let offsets: Vec<usize> = self.argument_types.iter().scan(0, |state, arg_ty| {
@@ -153,47 +167,48 @@ impl WriteLLVMIR for CallBucket {
             }
         }
 
-        let arena = pointer_cast(
-            producer,
-            arena.into_pointer_value(),
-            bigint_type(producer).ptr_type(Default::default()),
-        );
+            let arena = pointer_cast(
+                producer,
+                arena.into_pointer_value(),
+                bigint_type(producer).ptr_type(Default::default()),
+            );
 
-        // Call function passing the array as argument
-        let call_ret_val = create_call(
-            producer,
-            self.symbol.as_str(),
-            &[to_basic_metadata_enum(arena.into())],
-        );
+            // Call function passing the array as argument
+            let call_ret_val = create_call(
+                producer,
+                self.symbol.as_str(),
+                &[to_basic_metadata_enum(arena.into())],
+            );
 
-        match &self.return_info {
-            ReturnType::Intermediate { op_aux_no } => {
-                todo!("ReturnType::Intermediate {:#?}", op_aux_no);
-            }
-            ReturnType::Final(data) => {
-                let size = data.context.size;
-                let source_of_store = if size == 1 {
-                    //For scalar returns, store the returned value to
-                    //  the proper index in the current function's arena.
-                    call_ret_val
-                } else {
-                    //For array returns, copy the data from the callee arena to the caller arena.
-                    create_gep(
+            match &self.return_info {
+                ReturnType::Intermediate { op_aux_no } => {
+                    todo!("ReturnType::Intermediate {:#?}", op_aux_no);
+                }
+                ReturnType::Final(data) => {
+                    let size = data.context.size;
+                    let source_of_store = if size == 1 {
+                        //For scalar returns, store the returned value to
+                        //  the proper index in the current function's arena.
+                        call_ret_val
+                    } else {
+                        //For array returns, copy the data from the callee arena to the caller arena.
+                        create_gep(
+                            producer,
+                            arena,
+                            &[i32_type(producer).const_int(self.arguments.len() as u64, false)],
+                        )
+                    };
+                    return StoreBucket::produce_llvm_ir(
                         producer,
-                        arena,
-                        &[i32_type(producer).const_int(self.arguments.len() as u64, false)],
-                    )
-                };
-                return StoreBucket::produce_llvm_ir(
-                    producer,
-                    Either::Left(source_of_store),
-                    &data.dest,
-                    &data.dest_address_type,
-                    InstrContext { size },
-                    &None,
-                );
-            }
-        };
+                        Either::Left(source_of_store),
+                        &data.dest,
+                        &data.dest_address_type,
+                        InstrContext { size },
+                        &None,
+                    );
+                }
+            };
+        }
     }
 }
 

From a4cec758c2d33bdeece09fa581e01be8106b9ad6 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 23 Aug 2023 14:27:26 -0500
Subject: [PATCH 04/42] code cleanup

---
 compiler/src/circuit_design/function.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/src/circuit_design/function.rs b/compiler/src/circuit_design/function.rs
index 997079b54..cba86907e 100644
--- a/compiler/src/circuit_design/function.rs
+++ b/compiler/src/circuit_design/function.rs
@@ -5,7 +5,7 @@ use crate::intermediate_representation::ir_interface::ObtainMeta;
 use crate::translating_traits::*;
 use code_producers::c_elements::*;
 use code_producers::llvm_elements::{LLVMInstruction, LLVMIRProducer};
-use code_producers::llvm_elements::functions::{create_bb};
+use code_producers::llvm_elements::functions::create_bb;
 use code_producers::llvm_elements::instructions::create_br;
 
 use code_producers::wasm_elements::*;

From da4d12b4934967c078be212c7f2522d24a6046c5 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 23 Aug 2023 15:29:42 -0500
Subject: [PATCH 05/42] documentation

---
 circuit_passes/src/passes/loop_unroll.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index 19d9eb3dc..5d9f179f0 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -119,6 +119,11 @@ impl LoopUnrollPass {
                 let name = self.extract_body(bucket);
                 for _ in 0..loop_count {
                     block_body.push(
+                        // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
+                        //  within the current body. However, it doesn't actually need to generate a load
+                        //  instruction to use these pointers as parameters to the function so we must use the
+                        //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
+                        //  the "loading" (but really it just returns the pointer that was passed in).
                         CallBucket {
                             id: new_id(),
                             source_file_id: bucket.source_file_id,

From c2a188f1de94b7a54e5ddae0d8f0e4b23a419cb1 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 24 Aug 2023 16:02:03 -0500
Subject: [PATCH 06/42] More cleanup from the PassMemory changes

---
 circuit_passes/src/bucket_interpreter/env.rs |  5 +----
 circuit_passes/src/passes/memory.rs          | 12 ++++++------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index 81ae2a752..78f615600 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -6,9 +6,6 @@ use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
 
-pub type TemplatesLibrary = HashMap<String, TemplateCode>;
-pub type FunctionsLibrary = HashMap<String, FunctionCode>;
-
 pub trait ContextSwitcher {
     fn switch<'a>(
         &'a self,
@@ -247,7 +244,7 @@ impl<'a> Env<'a> {
         }
         let interpreter = self.context_switcher.switch(interpreter, name);
         let r = interpreter.execute_instructions(
-            &code,
+            code,
             function_env,
             !interpreter.observer.ignore_function_calls() && observe,
         );
diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/passes/memory.rs
index eeedb73e1..25d96d763 100644
--- a/circuit_passes/src/passes/memory.rs
+++ b/circuit_passes/src/passes/memory.rs
@@ -7,16 +7,16 @@ use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::env::{
-    ContextSwitcher, FunctionsLibrary, TemplatesLibrary, LibraryAccess,
-};
+use crate::bucket_interpreter::env::{ContextSwitcher, LibraryAccess};
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 
 pub struct PassMemory {
-    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    templates_library: RefCell<TemplatesLibrary>,
-    functions_library: RefCell<FunctionsLibrary>,
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need
+    //  mutability. In some cases, very fine-grained mutability which is why everything here is
+    //  wrapped separately and the template/function library values themselves are wrapped separately.
+    templates_library: RefCell<HashMap<String, TemplateCode>>,
+    functions_library: RefCell<HashMap<String, FunctionCode>>,
     constant_fields: RefCell<Vec<String>>,
     current_scope: RefCell<String>,
     io_map: RefCell<TemplateInstanceIOMap>,

From c83d843088085ddfda452630445ac753e504678b Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 6 Sep 2023 10:06:09 -0500
Subject: [PATCH 07/42] complete extraction for simple cases

---
 circuit_passes/src/bucket_interpreter/env.rs  |   4 +
 circuit_passes/src/bucket_interpreter/mod.rs  |  22 +-
 .../src/bucket_interpreter/value.rs           |  16 +-
 circuit_passes/src/passes/loop_unroll.rs      | 763 +++++++++++++++---
 circuit_passes/src/passes/memory.rs           |   2 +-
 code_producers/src/llvm_elements/fr.rs        | 112 ++-
 code_producers/src/llvm_elements/functions.rs |  66 +-
 .../src/llvm_elements/instructions.rs         |  11 +-
 compiler/src/circuit_design/circuit.rs        |  38 +-
 .../store_bucket.rs                           |  10 +-
 10 files changed, 841 insertions(+), 203 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index 78f615600..08de8cb02 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -154,6 +154,10 @@ impl<'a> Env<'a> {
         self.subcmps.get(&subcmp_idx).unwrap().counter_equal_to(value)
     }
 
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        self.vars.clone()
+    }
+
     // WRITE OPERATIONS
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         let mut copy = self;
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index c463f62e9..c1c1b5ba5 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -45,12 +45,12 @@ impl<'a> BucketInterpreter<'a> {
         BucketInterpreter {
             observer,
             mem,
-            scope: scope.clone(),
+            scope,
             p: UsefulConstants::new(mem.get_prime()).get_p().clone(),
         }
     }
 
-    fn get_id_from_indexed_location(&self, location: &LocationRule, env: &Env) -> usize {
+    pub fn get_index_from_location(&self, location: &LocationRule, env: &Env) -> usize {
         match location {
             LocationRule::Indexed { location, .. } => {
                 let (idx, _) = self.execute_instruction(location, env.clone(), false);
@@ -70,19 +70,19 @@ impl<'a> BucketInterpreter<'a> {
     ) {
         match bucket.dest_address_type {
             AddressType::Variable => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
+                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_variables_index_mapping(&self.scope, &idx) {
                     vars.push(index);
                 }
             }
             AddressType::Signal => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
+                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_signal_index_mapping(&self.scope, &idx) {
                     signals.push(index);
                 }
             }
             AddressType::SubcmpSignal { .. } => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
+                let idx = self.get_index_from_location(&bucket.dest, env);
                 for index in self.mem.get_component_addr_index_mapping(&self.scope, &idx) {
                     subcmps.push(index);
                 }
@@ -556,16 +556,16 @@ impl<'a> BucketInterpreter<'a> {
         return match cond_bool_result {
             None => (None, None, env),
             Some(true) => {
-                if cfg!(debug_assertions) {
-                    println!("Running then branch");
-                }
+                // if cfg!(debug_assertions) {
+                //     println!("Running then branch");
+                // }
                 let (ret, env) = self.execute_instructions(&true_branch, env, observe);
                 (ret, Some(true), env)
             }
             Some(false) => {
-                if cfg!(debug_assertions) {
-                    println!("Running else branch");
-                }
+                // if cfg!(debug_assertions) {
+                //     println!("Running else branch");
+                // }
                 let (ret, env) = self.execute_instructions(&false_branch, env, observe);
                 (ret, Some(false), env)
             }
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 6da450698..3ba9df343 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -1,4 +1,4 @@
-use std::fmt::{Display, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use compiler::intermediate_representation::ir_interface::{ValueBucket, ValueType};
 use compiler::num_bigint::BigInt;
 use compiler::num_traits::ToPrimitive;
@@ -14,7 +14,7 @@ pub trait JoinSemiLattice {
 /// Poor man's lattice that gives up the moment values are not equal
 /// It's a join semi lattice with a top (Unknown)
 /// Not a complete lattice because there is no bottom
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Eq, PartialEq)]
 pub enum Value {
     Unknown,
     KnownU32(usize),
@@ -22,6 +22,16 @@ pub enum Value {
 }
 
 impl Display for Value {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Unknown => write!(f, "Unknown"),
+            KnownU32(n) => write!(f, "{}", n),
+            KnownBigInt(n) => write!(f, "{}", n),
+        }
+    }
+}
+
+impl Debug for Value {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             Unknown => write!(f, "Unknown"),
@@ -54,7 +64,7 @@ impl Value {
     pub fn get_bigint_as_string(&self) -> String {
         match self {
             KnownBigInt(b) => b.to_string(),
-            _ => panic!("Can't extract a string representation of a non big int"),
+            _ => panic!("Value is not a KnownBigInt! {:?}", self),
         }
     }
 
diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index 5d9f179f0..a21a4e11b 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -1,26 +1,233 @@
 use std::cell::RefCell;
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::{Debug, Formatter};
 use std::vec;
 use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
-use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_0_PTR;
+use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
 use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::hir::very_concrete_program::Param;
 use compiler::intermediate_representation::{
-    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId, ToSExp,
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::value::Value;
 use crate::passes::CircuitTransformationPass;
 use crate::passes::memory::PassMemory;
 
+struct VariableValues<'a> {
+    pub env_at_header: Env<'a>,
+    pub loadstore_to_index: HashMap<BucketId, (AddressType, Value)>, // key is load/store bucket ID
+}
+
+impl<'a> VariableValues<'a> {
+    pub fn new(env_at_header: Env<'a>) -> Self {
+        VariableValues { env_at_header, loadstore_to_index: Default::default() }
+    }
+}
+
+impl Debug for VariableValues<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        // write!(
+        //     f,
+        //     "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
+        //     self.env_at_header, self.loadstore_to_index
+        // )
+        write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
+    }
+}
+
+struct EnvRecorder<'a> {
+    mem: &'a PassMemory,
+    // NOTE: RefCell is needed here because the instance of this struct is borrowed by
+    //  the main interpreter while we also need to mutate these internal structures.
+    vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
+    current_iter_num: RefCell<usize>,
+    safe_to_move: RefCell<bool>,
+}
+
+impl Debug for EnvRecorder<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "\n current_iter_num = {}\n safe_to_move = {:?}\n vals_per_iteration = {:?}",
+            self.current_iter_num.borrow(),
+            self.safe_to_move.borrow(),
+            self.vals_per_iteration.borrow(),
+        )
+    }
+}
+
+impl<'a> EnvRecorder<'a> {
+    pub fn new(mem: &'a PassMemory) -> Self {
+        EnvRecorder {
+            mem,
+            vals_per_iteration: Default::default(),
+            current_iter_num: RefCell::new(0),
+            safe_to_move: RefCell::new(true),
+        }
+    }
+
+    pub fn is_safe_to_move(&self) -> bool {
+        *self.safe_to_move.borrow()
+    }
+
+    pub fn increment_iter(&self) {
+        *self.current_iter_num.borrow_mut() += 1;
+    }
+
+    pub fn get_iter(&self) -> usize {
+        *self.current_iter_num.borrow()
+    }
+
+    pub fn record_env_at_header(&self, env: Env<'a>) {
+        let iter = self.get_iter();
+        assert!(!self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow_mut().insert(iter, VariableValues::new(env));
+    }
+
+    pub fn get_header_env_clone(&self) -> Env {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow().get(&iter).unwrap().env_at_header.clone()
+    }
+
+    fn record_memloc_at_bucket(&self, bucket_id: &BucketId, addr_ty: AddressType, val: Value) {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration
+            .borrow_mut()
+            .get_mut(&iter)
+            .unwrap()
+            .loadstore_to_index
+            .insert(*bucket_id, (addr_ty, val));
+    }
+
+    fn compute_index(&self, loc: &LocationRule, env: &Env) -> Value {
+        match loc {
+            LocationRule::Mapped { .. } => {
+                todo!(); //not sure if/how to handle that
+            }
+            LocationRule::Indexed { location, .. } => {
+                // Evaluate the index using the current environment and using the environment from the
+                //  loop header. If either is Unknown or they do not give the same value, then it is
+                //  not safe to move the loop body to another function because the index computation may
+                //  not give the same result when done at the call site, outside of the new function.
+                let interp = self.mem.build_interpreter(self);
+                let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
+                // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
+                if let Some(idx_loc) = idx_loc {
+                    let (idx_header, _) =
+                        interp.execute_instruction(location, self.get_header_env_clone(), false);
+                    if let Some(idx_header) = idx_header {
+                        if Value::eq(&idx_header, &idx_loc) {
+                            return idx_loc;
+                        }
+                    }
+                }
+                Value::Unknown
+            }
+        }
+    }
+
+    fn check(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
+        let val_result = self.compute_index(loc, env);
+        if val_result == Value::Unknown {
+            self.safe_to_move.replace(false);
+        }
+        //NOTE: must record even when Unknown to ensure that Unknown
+        //  value is not confused with missing values for an iteration
+        //  that can be caused by conditionals within the loop.
+        self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), val_result);
+    }
+}
+
+impl InterpreterObserver for EnvRecorder<'_> {
+    fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.check(&bucket.id, &bucket.address_type, &bucket.src, env);
+        true
+    }
+
+    fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.check(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
+        true
+    }
+
+    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_loop_bucket(&self, _bucket: &LoopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn ignore_function_calls(&self) -> bool {
+        true
+    }
+
+    fn ignore_subcmp_calls(&self) -> bool {
+        true
+    }
+}
+
 pub struct LoopUnrollPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
-    new_functions: RefCell<Vec<FunctionCode>>,
+    new_body_functions: RefCell<Vec<FunctionCode>>,
 }
 
 impl LoopUnrollPass {
@@ -28,18 +235,235 @@ impl LoopUnrollPass {
         LoopUnrollPass {
             memory: PassMemory::new(prime, String::from(""), Default::default()),
             replacements: Default::default(),
-            new_functions: Default::default(),
+            new_body_functions: Default::default(),
         }
     }
 
-    fn extract_body(&self, bucket: &LoopBucket) -> String {
+    fn check_load_bucket(
+        &self,
+        bucket: &mut LoadBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
+            // Update the destination information to reference the argument
+            //NOTE: This can't use AddressType::Variable or AddressType::Signal
+            //  because ExtractedFunctionLLVMIRProducer references the first two
+            //  parameters with those. So this has to use SubcmpSignal (it should
+            //  work fine because subcomps will also just be additional params).
+            bucket.address_type = AddressType::SubcmpSignal {
+                cmp_address: Self::new_u32_value(bucket, x),
+                uniform_parallel_value: None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.src = LocationRule::Indexed {
+                location: Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            self.check_address_type(&mut bucket.address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.src, bucket_arg_order);
+        }
+    }
+
+    fn check_store_bucket(
+        &self,
+        bucket: &mut StoreBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        // Check the source/RHS of the store in either case
+        self.check_instruction(&mut bucket.src, bucket_arg_order);
+        //
+        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
+            // Update the destination information to reference the argument
+            bucket.dest_address_type = AddressType::SubcmpSignal {
+                cmp_address: Self::new_u32_value(bucket, x),
+                uniform_parallel_value: None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.dest = LocationRule::Indexed {
+                location: Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            self.check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.dest, bucket_arg_order);
+        }
+    }
+
+    fn check_location_rule(
+        &self,
+        location_rule: &mut LocationRule,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        match location_rule {
+            LocationRule::Indexed { location, .. } => {
+                self.check_instruction(location, bucket_arg_order);
+            }
+            LocationRule::Mapped { .. } => unreachable!(),
+        }
+    }
+
+    fn check_address_type(
+        &self,
+        addr_type: &mut AddressType,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
+            self.check_instruction(cmp_address, bucket_arg_order);
+        }
+    }
+
+    fn check_compute_bucket(
+        &self,
+        bucket: &mut ComputeBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        for i in &mut bucket.stack {
+            self.check_instruction(i, bucket_arg_order);
+        }
+    }
+
+    fn check_assert_bucket(
+        &self,
+        bucket: &mut AssertBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        self.check_instruction(&mut bucket.evaluate, bucket_arg_order);
+    }
+
+    fn check_loop_bucket(
+        &self,
+        bucket: &mut LoopBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_create_cmp_bucket(
+        &self,
+        bucket: &mut CreateCmpBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_constraint_bucket(
+        &self,
+        bucket: &mut ConstraintBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        self.check_instruction(
+            match bucket {
+                ConstraintBucket::Substitution(i) => i,
+                ConstraintBucket::Equality(i) => i,
+            },
+            bucket_arg_order,
+        );
+    }
+
+    fn check_block_bucket(
+        &self,
+        bucket: &mut BlockBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_call_bucket(
+        &self,
+        bucket: &mut CallBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_branch_bucket(
+        &self,
+        bucket: &mut BranchBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_return_bucket(
+        &self,
+        bucket: &mut ReturnBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        self.check_instruction(&mut bucket.value, bucket_arg_order);
+    }
+
+    fn check_log_bucket(
+        &self,
+        bucket: &mut LogBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        for arg in &mut bucket.argsprint {
+            if let LogBucketArg::LogExp(i) = arg {
+                self.check_instruction(i, bucket_arg_order);
+            }
+        }
+    }
+
+    //Nothing to do
+    fn check_value_bucket(&self, _: &mut ValueBucket, _: &mut BTreeMap<BucketId, usize>) {}
+    fn check_nop_bucket(&self, _: &mut NopBucket, _: &mut BTreeMap<BucketId, usize>) {}
+
+    fn check_instruction(
+        &self,
+        inst: &mut InstructionPointer,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        match inst.as_mut() {
+            Instruction::Value(ref mut b) => self.check_value_bucket(b, bucket_arg_order),
+            Instruction::Load(ref mut b) => self.check_load_bucket(b, bucket_arg_order),
+            Instruction::Store(ref mut b) => self.check_store_bucket(b, bucket_arg_order),
+            Instruction::Compute(ref mut b) => self.check_compute_bucket(b, bucket_arg_order),
+            Instruction::Call(ref mut b) => self.check_call_bucket(b, bucket_arg_order),
+            Instruction::Branch(ref mut b) => self.check_branch_bucket(b, bucket_arg_order),
+            Instruction::Return(ref mut b) => self.check_return_bucket(b, bucket_arg_order),
+            Instruction::Assert(ref mut b) => self.check_assert_bucket(b, bucket_arg_order),
+            Instruction::Log(ref mut b) => self.check_log_bucket(b, bucket_arg_order),
+            Instruction::Loop(ref mut b) => self.check_loop_bucket(b, bucket_arg_order),
+            Instruction::CreateCmp(ref mut b) => self.check_create_cmp_bucket(b, bucket_arg_order),
+            Instruction::Constraint(ref mut b) => self.check_constraint_bucket(b, bucket_arg_order),
+            Instruction::Block(ref mut b) => self.check_block_bucket(b, bucket_arg_order),
+            Instruction::Nop(ref mut b) => self.check_nop_bucket(b, bucket_arg_order),
+        }
+    }
+
+    fn extract_body(
+        &self,
+        bucket: &LoopBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) -> String {
+        // NOTE: must create parameter list before 'bucket_arg_order' is modified
+        let mut params = vec![
+            Param { name: String::from("lvars"), length: vec![0] },
+            Param { name: String::from("signals"), length: vec![0] },
+        ];
+        for i in 0..bucket_arg_order.len() {
+            params.push(Param { name: format!("fixed_{}", i), length: vec![0] });
+        }
+
         // Copy loop body and add a "return void" at the end
         let mut new_body = vec![];
         for s in &bucket.body {
-            let mut copy = s.clone();
+            let mut copy: InstructionPointer = s.clone();
+            if !bucket_arg_order.is_empty() {
+                //Traverse each cloned statement before calling `update_id()` and replace the
+                //  old location reference with reference to the proper argument. Mappings are
+                //  removed as they are processed so no change is needed once the map is empty.
+                self.check_instruction(&mut copy, bucket_arg_order);
+            }
             copy.update_id();
             new_body.push(copy);
         }
+        assert!(bucket_arg_order.is_empty());
         new_body.push(
             ReturnBucket {
                 id: new_id(),
@@ -52,142 +476,262 @@ impl LoopUnrollPass {
             .allocate(),
         );
         // Create new function to hold the copied body
-        let name = format!("{}loop.body.{}", GENERATED_FN_PREFIX, new_id());
+        // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+        let func_name = format!("{}loop.body.{}", GENERATED_FN_PREFIX, new_id());
         let new_func = Box::new(FunctionCodeInfo {
             source_file_id: bucket.source_file_id,
             line: bucket.line,
-            name: name.clone(),
-            header: name.clone(),
+            name: func_name.clone(),
+            header: func_name.clone(),
             body: new_body,
-            params: vec![
-                Param { name: String::from("signals"), length: vec![0] },
-                Param { name: String::from("lvars"), length: vec![0] },
-            ],
-            returns: vec![0], // this will produce void return type on the function
+            params,
+            returns: vec![], // void return type on the function
             ..FunctionCodeInfo::default()
         });
-        self.memory.add_function(&new_func);
-        self.new_functions.borrow_mut().push(new_func);
-        //
-        name
+        // Store the function to be transformed and added to circuit later
+        self.new_body_functions.borrow_mut().push(new_func); 
+        func_name
+    }
+
+    fn new_u32_value(bucket: &dyn ObtainMeta, val: usize) -> InstructionPointer {
+        ValueBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            parse_as: ValueType::U32,
+            op_aux_no: 0,
+            value: val,
+        }
+        .allocate()
+    }
+    fn new_custom_fn_load_bucket(
+        bucket: &dyn ObtainMeta,
+        load_fun: &str,
+        addr_type: AddressType,
+        location: InstructionPointer,
+    ) -> InstructionPointer {
+        LoadBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            address_type: addr_type,
+            src: LocationRule::Indexed { location, template_header: None },
+            bounded_fn: Some(String::from(load_fun)),
+        }
+        .allocate()
+    }
+
+    fn new_storage_ptr_ref(bucket: &dyn ObtainMeta, addr_type: AddressType) -> InstructionPointer {
+        Self::new_custom_fn_load_bucket(
+            bucket,
+            FR_IDENTITY_ARR_PTR,
+            addr_type,
+            Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+        )
+    }
+
+    //NOTE: When the 'bounded_fn' for LoadBucket is Some(_), the index parameter
+    //  is ignored so we must instead use `FR_INDEX_ARR_PTR` to apply the index.
+    //  Uses of that function can be inlined later.
+    // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+    fn new_indexed_storage_ptr_ref(
+        bucket: &dyn ObtainMeta,
+        addr_type: AddressType,
+        index: usize,
+    ) -> InstructionPointer {
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            symbol: String::from(FR_INDEX_ARR_PTR),
+            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+            argument_types: vec![], // LLVM IR generation doesn't use this field
+            arguments: vec![
+                Self::new_storage_ptr_ref(bucket, addr_type),
+                Self::new_u32_value(bucket, index),
+            ],
+        }
+        .allocate()
+    }
+
+    fn is_all_same(data: &[usize]) -> bool {
+        data.iter()
+            .fold((true, None), {
+                |acc, elem| {
+                    if acc.1.is_some() {
+                        (acc.0 && (acc.1.unwrap() == elem), Some(elem))
+                    } else {
+                        (true, Some(elem))
+                    }
+                }
+            })
+            .0
+    }
+
+    //return value key is iteration number
+    fn compute_extra_args(
+        recorder: &EnvRecorder,
+    ) -> (HashMap<usize, Vec<(AddressType, Value)>>, BTreeMap<BucketId, usize>) {
+        let mut iter_to_loc: HashMap<usize, Vec<(AddressType, Value)>> = HashMap::default();
+        let mut bucket_arg_order = BTreeMap::new();
+        let vpi = recorder.vals_per_iteration.borrow();
+        let all_loadstore_bucket_ids: HashSet<&BucketId> =
+            vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
+        // println!("all_loadstore_bucket_ids = {:?}", all_loadstore_bucket_ids);
+        for id in all_loadstore_bucket_ids {
+            // Check if the computed index value is the same across all iterations for this BucketId.
+            //  If it is not the same in all iterations, then it needs to be passed as a separate
+            //  parameter to the new function.
+            // NOTE: Some iterations of the loop may have no mapping for certain BucketIds because
+            //  conditional branches can make certain buckets unused in some iterations. Just ignore
+            //  those cases where there is no value for a certain iteration and check among those
+            //  iterations that have a value. This is the reason it was important to store Unknown
+            //  values in the `loadstore_to_index` index as well, so they are not confused with
+            //  missing values.
+            let mut next_iter_to_store = 0;
+            let mut prev_val = None;
+            for curr_iter in 0..recorder.get_iter() {
+                let curr_val = vpi[&curr_iter].loadstore_to_index.get(id);
+                if curr_val.is_some() {
+                    if prev_val.is_none() {
+                        //initial state
+                        prev_val = curr_val;
+                    } else {
+                        assert!(prev_val.is_some() && curr_val.is_some());
+                        let prev_val_pair = prev_val.unwrap();
+                        let curr_val_pair = curr_val.unwrap();
+                        assert_eq!(prev_val_pair.0, curr_val_pair.0); //AddressType always matches
+                        if !Value::eq(&prev_val_pair.1, &curr_val_pair.1) {
+                            assert!(!prev_val_pair.1.is_unknown() && !curr_val_pair.1.is_unknown());
+                            // Store current Value for current iteration
+                            iter_to_loc.entry(curr_iter).or_default().push(curr_val_pair.clone());
+                            // Store previous Value for all iterations that did have the same
+                            //  value (or None) and have not yet been stored.
+                            for j in next_iter_to_store..curr_iter {
+                                iter_to_loc.entry(j).or_default().push(prev_val_pair.clone());
+                            }
+                            // Update for next iteration
+                            next_iter_to_store = curr_iter + 1;
+                            prev_val = curr_val;
+                        }
+                    }
+                }
+            }
+            //ASSERT: All vectors have the same length at the end of each iteration
+            assert!(Self::is_all_same(&iter_to_loc.values().map(|x| x.len()).collect::<Vec<_>>()));
+            //ASSERT: Value was added for every iteration or for no iterations
+            assert!(next_iter_to_store == 0 || next_iter_to_store == recorder.get_iter());
+            //
+            if next_iter_to_store != 0 {
+                bucket_arg_order.insert(id.clone(), bucket_arg_order.len());
+            }
+        }
+        (iter_to_loc, bucket_arg_order)
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
+        // {
+        //     println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
+        //     for (i, s) in bucket.body.iter().enumerate() {
+        //         println!("[{}/{}]{}", i + 1, bucket.body.len(), s.to_sexp().to_pretty(100));
+        //     }
+        //     for (i, s) in bucket.body.iter().enumerate() {
+        //         println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
+        //     }
+        //     println!("LOOP ENTRY env {}", env); //TODO: TEMP
+        // }
         // Compute loop iteration count. If unknown, return immediately.
-        let loop_count;
+        let recorder = EnvRecorder::new(&self.memory);
         {
-            let mut iters = 0;
-            let interpreter = self.memory.build_interpreter(self);
+            //TODO: This has the wrong scope if an inner function w/ fixed params will be processed! Need test case for it.
+            //  Can't make it crash. Maybe it's not activating in current setup, it was only when I tried to process the other functions?
+            let interpreter = self.memory.build_interpreter(&recorder);
             let mut inner_env = env.clone();
             loop {
+                recorder.record_env_at_header(inner_env.clone());
                 let (_, cond, new_env) =
-                    interpreter.execute_loop_bucket_once(bucket, inner_env, false);
+                    interpreter.execute_loop_bucket_once(bucket, inner_env, true);
                 match cond {
                     // If the conditional becomes unknown just give up.
                     None => return (None, 0),
                     // When conditional becomes `false`, iteration count is complete.
                     Some(false) => break,
                     // Otherwise, continue counting.
-                    Some(true) => iters += 1,
-                }
+                    Some(true) => recorder.increment_iter(),
+                };
                 inner_env = new_env;
             }
-            loop_count = iters;
         }
+        // println!("recorder = {:?}", recorder); //TODO: TEMP
 
-        // If the loop body contains more than one instruction, extract it into a
-        // new function and generate 'loop_count' number of calls to that function.
-        // Otherwise, just duplicate the body 'loop_count' number of times.
         let mut block_body = vec![];
-        match &bucket.body[..] {
-            [a] => {
-                for _ in 0..loop_count {
-                    let mut copy = a.clone();
-                    copy.update_id();
-                    block_body.push(copy);
+        if recorder.is_safe_to_move() {
+            // If the loop body contains more than one instruction, extract it into a new
+            // function and generate 'recorder.get_iter()' number of calls to that function.
+            // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.
+            match &bucket.body[..] {
+                [a] => {
+                    for _ in 0..recorder.get_iter() {
+                        let mut copy = a.clone();
+                        copy.update_id();
+                        block_body.push(copy);
+                    }
                 }
-            }
-            b => {
-                assert!(b.len() > 1);
-                //
-                //TODO: If any subcmps are used inside the loop body, add an additional '[0 x i256]*' parameter on
-                //  the new function for each one that is used and pass the arena of each into the function call.
-                //
-                //TODO: Any value indexed by a variable that changes from one loop iteration to another needs to
-                //  be indexed outside of the function and then have just that pointer passed into the function.
-                //
-                let name = self.extract_body(bucket);
-                for _ in 0..loop_count {
-                    block_body.push(
+                b => {
+                    assert!(b.len() > 1);
+                    let (iter_to_loc, mut bucket_arg_order) = Self::compute_extra_args(&recorder);
+                    let name = self.extract_body(bucket, &mut bucket_arg_order);
+                    for iter_num in 0..recorder.get_iter() {
                         // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
                         //  within the current body. However, it doesn't actually need to generate a load
                         //  instruction to use these pointers as parameters to the function so we must use the
                         //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
                         //  the "loading" (but really it just returns the pointer that was passed in).
-                        CallBucket {
-                            id: new_id(),
-                            source_file_id: bucket.source_file_id,
-                            line: bucket.line,
-                            message_id: bucket.message_id,
-                            symbol: name.clone(),
-                            return_info: ReturnType::Intermediate { op_aux_no: 0 },
-                            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
-                            argument_types: vec![], // LLVM IR generation doesn't use this field
-                            arguments: vec![
-                                // Parameter for signals/arena
-                                LoadBucket {
-                                    id: new_id(),
-                                    source_file_id: bucket.source_file_id,
-                                    line: bucket.line,
-                                    message_id: bucket.message_id,
-                                    address_type: AddressType::Signal,
-                                    src: LocationRule::Indexed {
-                                        location: ValueBucket {
-                                            id: new_id(),
-                                            source_file_id: bucket.source_file_id,
-                                            line: bucket.line,
-                                            message_id: bucket.message_id,
-                                            parse_as: ValueType::U32,
-                                            op_aux_no: 0,
-                                            value: 0,
-                                        }
-                                        .allocate(),
-                                        template_header: None,
-                                    },
-                                    bounded_fn: Some(String::from(FR_IDENTITY_ARR_0_PTR)),
-                                }
-                                .allocate(),
-                                // Parameter for local vars
-                                LoadBucket {
-                                    id: new_id(),
-                                    source_file_id: bucket.source_file_id,
-                                    line: bucket.line,
-                                    message_id: bucket.message_id,
-                                    address_type: AddressType::Variable,
-                                    src: LocationRule::Indexed {
-                                        location: ValueBucket {
-                                            id: new_id(),
-                                            source_file_id: bucket.source_file_id,
-                                            line: bucket.line,
-                                            message_id: bucket.message_id,
-                                            parse_as: ValueType::U32,
-                                            op_aux_no: 0,
-                                            value: 0,
-                                        }
-                                        .allocate(),
-                                        template_header: None,
-                                    },
-                                    bounded_fn: Some(String::from(FR_IDENTITY_ARR_0_PTR)),
-                                }
-                                .allocate(),
-                            ],
+                        let mut args = InstructionList::default();
+                        // Parameter for local vars
+                        args.push(Self::new_storage_ptr_ref(bucket, AddressType::Variable));
+                        // Parameter for signals/arena
+                        args.push(Self::new_storage_ptr_ref(bucket, AddressType::Signal));
+                        //Additional parameters for variant vector/array access within the loop
+                        for a in &iter_to_loc[&iter_num] {
+                            args.push(Self::new_indexed_storage_ptr_ref(
+                                bucket,
+                                a.0.clone(),
+                                a.1.get_u32(),
+                            ));
                         }
-                        .allocate(),
-                    );
+                        block_body.push(
+                            CallBucket {
+                                id: new_id(),
+                                source_file_id: bucket.source_file_id,
+                                line: bucket.line,
+                                message_id: bucket.message_id,
+                                symbol: name.clone(),
+                                return_info: ReturnType::Intermediate { op_aux_no: 0 },
+                                arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+                                argument_types: vec![], // LLVM IR generation doesn't use this field
+                                arguments: args,
+                            }
+                            .allocate(),
+                        );
+                    }
+                }
+            }
+        } else {
+            //If the loop body is not safe to move into a new function, just unroll.
+            for _ in 0..recorder.get_iter() {
+                for s in &bucket.body {
+                    let mut copy = s.clone();
+                    copy.update_id();
+                    block_body.push(copy);
                 }
             }
         }
-        (Some(block_body), loop_count)
+        (Some(block_body), recorder.get_iter())
     }
 
     // Will take the unrolled loop and interpretate it
@@ -290,7 +834,8 @@ impl CircuitTransformationPass for LoopUnrollPass {
     }
 
     fn post_hook_circuit(&self, cir: &mut Circuit) {
-        for f in self.new_functions.borrow().iter() {
+        // Transform and add the new body functions
+        for f in self.new_body_functions.borrow().iter() {
             cir.functions.push(self.transform_function(&f));
         }
     }
diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/passes/memory.rs
index 25d96d763..f20872a6e 100644
--- a/circuit_passes/src/passes/memory.rs
+++ b/circuit_passes/src/passes/memory.rs
@@ -148,7 +148,7 @@ impl ContextSwitcher for PassMemory {
         interpreter: &'a BucketInterpreter<'a>,
         scope: &'a String,
     ) -> BucketInterpreter<'a> {
-        self.build_interpreter_with_scope(interpreter.observer, scope.to_string())
+        self.build_interpreter_with_scope(interpreter.observer, scope.clone())
     }
 }
 
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index 3b43d154e..ee64c04df 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -1,16 +1,19 @@
+use inkwell::attributes::{Attribute, AttributeLoc};
+use inkwell::values::FunctionValue;
+
 use crate::llvm_elements::LLVMIRProducer;
-use crate::llvm_elements::functions::create_bb;
-use crate::llvm_elements::functions::create_function;
+use crate::llvm_elements::functions::{create_bb, create_function};
 use crate::llvm_elements::instructions::{
     create_add, create_sub, create_mul, create_div, create_mod, create_pow, create_eq, create_neq,
-    create_lt, create_gt, create_le, create_ge, create_neg, create_shl, create_shr, create_bit_and,
-    create_bit_or, create_bit_xor, create_logic_and, create_logic_or, create_logic_not,
-    create_return, create_cast_to_addr,
+    create_lt, create_gt, create_le, create_ge, create_gep, create_neg, create_shl, create_shr,
+    create_bit_and, create_bit_or, create_bit_xor, create_logic_and, create_logic_or,
+    create_logic_not, create_return, create_cast_to_addr,
 };
 use crate::llvm_elements::types::{bigint_type, bool_type, i32_type, void_type};
 
 use super::instructions::create_array_copy;
 use super::instructions::{create_inv, create_return_void};
+use super::values::zero;
 
 pub const FR_ADD_FN_NAME: &str = "fr_add";
 pub const FR_SUB_FN_NAME: &str = "fr_sub";
@@ -37,7 +40,8 @@ pub const FR_LOR_FN_NAME: &str = "fr_logic_or";
 pub const FR_LNOT_FN_NAME: &str = "fr_logic_not";
 pub const FR_ADDR_CAST_FN_NAME: &str = "fr_cast_to_addr";
 pub const FR_ARRAY_COPY_FN_NAME: &str = "fr_copy_n";
-pub const FR_IDENTITY_ARR_0_PTR: &str = "identity_arr_0_ptr";
+pub const FR_IDENTITY_ARR_PTR: &str = "identity_arr_ptr";
+pub const FR_INDEX_ARR_PTR: &str = "index_arr_ptr";
 
 macro_rules! fr_unary_op_base {
     ($name: expr, $producer: expr, $argTy: expr, $retTy: expr) => {{
@@ -90,26 +94,35 @@ macro_rules! fr_binary_op_bigint_to_bool {
     }};
 }
 
-pub fn add_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn add_inline_attribute<'a>(producer: &dyn LLVMIRProducer<'a>, func: FunctionValue) {
+    func.add_attribute(
+        AttributeLoc::Function,
+        producer
+            .context()
+            .create_enum_attribute(Attribute::get_named_enum_kind_id("alwaysinline"), 1),
+    );
+}
+
+fn add_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_ADD_FN_NAME, producer);
     let add = create_add(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
-pub fn sub_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn sub_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SUB_FN_NAME, producer);
     let add = create_sub(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
-pub fn mul_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn mul_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_MUL_FN_NAME, producer);
     let add = create_mul(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
 // Multiplication by the inverse
-pub fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_DIV_FN_NAME, producer);
     let inv = create_inv(producer, rhs.into_int_value());
     let res = create_mul(producer, lhs.into_int_value(), inv.into_int_value());
@@ -117,20 +130,20 @@ pub fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
 }
 
 // Quotient of the integer division
-pub fn intdiv_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn intdiv_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_INTDIV_FN_NAME, producer);
     let res = create_div(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
 // Remainder of the integer division
-pub fn mod_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn mod_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_MOD_FN_NAME, producer);
     let div = create_mod(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, div.into_int_value());
 }
 
-pub fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_POW_FN_NAME, producer);
     let f = producer
         .llvm()
@@ -141,79 +154,79 @@ pub fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn eq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn eq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_EQ_FN_NAME, producer);
     let eq = create_eq(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, eq.into_int_value());
 }
 
-pub fn neq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn neq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_NEQ_FN_NAME, producer);
     let neq = create_neq(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, neq.into_int_value());
 }
 
-pub fn lt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn lt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_LT_FN_NAME, producer);
     let res = create_lt(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn gt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn gt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_GT_FN_NAME, producer);
     let res = create_gt(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn le_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn le_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_LE_FN_NAME, producer);
     let res = create_le(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn ge_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn ge_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_GE_FN_NAME, producer);
     let res = create_ge(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn neg_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn neg_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let arg = fr_unary_op!(FR_NEG_FN_NAME, producer, bigint_type(producer));
     let neg = create_neg(producer, arg.into_int_value());
     create_return(producer, neg.into_int_value());
 }
 
-pub fn shl_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn shl_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SHL_FN_NAME, producer);
     let res = create_shl(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn shr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn shr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SHR_FN_NAME, producer);
     let res = create_shr(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITAND_FN_NAME, producer);
     let res = create_bit_and(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITOR_FN_NAME, producer);
     let res = create_bit_or(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_xor_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_xor_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITXOR_FN_NAME, producer);
     let res = create_bit_xor(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let ty = bigint_type(producer);
     let arg = fr_unary_op!(FR_BITFLIP_FN_NAME, producer, ty);
     // ~x <=> xor(x, 0xFF...)
@@ -221,25 +234,25 @@ pub fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bool!(FR_LAND_FN_NAME, producer);
     let res = create_logic_and(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bool!(FR_LOR_FN_NAME, producer);
     let res = create_logic_or(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_not_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_not_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let arg = fr_unary_op!(FR_LNOT_FN_NAME, producer, bool_type(producer));
     let res = create_logic_not(producer, arg.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let arg = fr_unary_op_base!(
         FR_ADDR_CAST_FN_NAME,
         producer,
@@ -250,7 +263,7 @@ pub fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let ptr_ty = bigint_type(producer).ptr_type(Default::default());
     let args = &[ptr_ty.into(), ptr_ty.into(), i32_type(producer).into()];
     let func = create_function(
@@ -272,22 +285,50 @@ pub fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return_void(producer);
 }
 
-pub fn identity_arr0_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn identity_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let val_type = bigint_type(producer).array_type(0).ptr_type(Default::default());
     let func = create_function(
         producer,
         &None,
         0,
         "",
-        FR_IDENTITY_ARR_0_PTR,
+        FR_IDENTITY_ARR_PTR,
         val_type.fn_type(&[val_type.into()], false),
     );
-    let main = create_bb(producer, func, FR_IDENTITY_ARR_0_PTR);
+    add_inline_attribute(producer, func);
+
+    let main = create_bb(producer, func, FR_IDENTITY_ARR_PTR);
     producer.set_current_bb(main);
     // Just return the parameter
     create_return(producer, func.get_nth_param(0).unwrap());
 }
 
+fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let bigint_ty = bigint_type(producer);
+    let val_ty = bigint_ty.array_type(0).ptr_type(Default::default());
+    let func = create_function(
+        producer,
+        &None,
+        0,
+        "",
+        FR_INDEX_ARR_PTR,
+        val_ty.fn_type(&[val_ty.into(), bigint_ty.into()], false),
+    );
+    add_inline_attribute(producer, func);
+
+    let arr = func.get_nth_param(0).unwrap();
+    let idx = func.get_nth_param(1).unwrap();
+    arr.set_name("arr");
+    idx.set_name("idx");
+
+    let main = create_bb(producer, func, FR_IDENTITY_ARR_PTR);
+    producer.set_current_bb(main);
+    let gep =
+        create_gep(producer, arr.into_pointer_value(), &[zero(producer), idx.into_int_value()]);
+    let cast = producer.llvm().builder.build_bitcast(gep.into_pointer_value(), val_ty, "");
+    create_return(producer, cast.into_pointer_value());
+}
+
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     add_fn(producer);
     sub_fn(producer);
@@ -313,6 +354,7 @@ pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     logic_not_fn(producer);
     addr_cast_fn(producer);
     array_copy_fn(producer);
-    identity_arr0_ptr_fn(producer);
+    identity_arr_ptr_fn(producer);
+    index_arr_ptr_fn(producer);
     pow_fn(producer); //uses functions generated by mul_fn & lt_fn
 }
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 2567bdbd0..c915cf4f0 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -135,25 +135,36 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
 
 struct ExtractedFunctionCtx<'a> {
     current_function: FunctionValue<'a>,
-    //TODO: will have at least the signals-arena [0 x i256]* and lvars [0 x i256]* from the template and possible subcmps
-    signals: PointerValue<'a>,
     lvars: PointerValue<'a>,
+    signals: Option<PointerValue<'a>>,
+    other: Vec<PointerValue<'a>>,
 }
 
 impl<'a> ExtractedFunctionCtx<'a> {
     fn new(current_function: FunctionValue<'a>) -> Self {
+        // NOTE: The 'lvars' [0 x i256]* parameter must always be present.
+        //  The 'signals' [0 x i256]* parameter is optional (to allow this to
+        //  handle the generated array index load functions for the unroller).
         ExtractedFunctionCtx {
             current_function,
-            signals: current_function
-                .get_nth_param(0)
-                .expect("Function must have at least 1 argument for signal array!")
-                .into_pointer_value(),
             lvars: current_function
-                .get_nth_param(1)
-                .expect("Function must have at least 2 arguments for signal and lvar arrays!")
+                .get_nth_param(0)
+                .expect("Function must have at least 1 argument for lvar array!")
                 .into_pointer_value(),
+            signals: current_function.get_nth_param(1).map(|x| x.into_pointer_value()),
+            other: current_function
+                .get_param_iter()
+                .skip(2)
+                .map(|x| x.into_pointer_value())
+                .collect::<Vec<_>>(),
         }
     }
+
+    fn get_signals_ptr(&self) -> PointerValue<'a> {
+        self.signals.expect(
+            format!("No signals argument for {:?}", self.current_function.get_name()).as_str(),
+        )
+    }
 }
 
 impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
@@ -174,40 +185,35 @@ impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
 impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
     fn load_subcmp(
         &self,
-        producer: &dyn LLVMIRProducer<'a>,
-        id: AnyValueEnum<'a>,
+        _producer: &dyn LLVMIRProducer<'a>,
+        _id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        todo!();
+        //NOTE: only used by CreateCmpBucket::produce_llvm_ir
+        //TODO: I think instead of ID defining an array index in the gep, it will need to define a static index
+        //  in an array of subcomponents in this context (i.e. self.subcmps[id] with offsets [0,0]).
+        todo!("load_subcmp {} from {:?}", _id, self.other);
         //create_gep(producer, self.subcmps, &[zero(producer), id.into_int_value()]).into_pointer_value()
     }
 
     fn load_subcmp_addr(
         &self,
-        producer: &dyn LLVMIRProducer<'a>,
+        _producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        todo!();
-        // let signals = create_gep(
-        //     producer,
-        //     self.subcmps,
-        //     &[zero(producer), id.into_int_value(), zero(producer)],
-        // )
-        // .into_pointer_value();
-        // create_load(producer, signals).into_pointer_value()
+        let num = id
+            .into_int_value()
+            .get_zero_extended_constant()
+            .expect("must reference a constant argument index");
+        *self.other.get(num as usize).expect("must reference a known argument index")
     }
 
     fn load_subcmp_counter(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
-        id: AnyValueEnum<'a>,
+        _id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        todo!();
-        // create_gep(
-        //     producer,
-        //     self.subcmps,
-        //     &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
-        // )
-        // .into_pointer_value()
+        // Use null pointer to force StoreBucket::produce_llvm_ir to skip counter increment
+        producer.context().i32_type().ptr_type(Default::default()).const_null()
     }
 
     fn get_signal(
@@ -216,11 +222,11 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         index: IntValue<'a>,
     ) -> AnyValueEnum<'a> {
         //'gep' must read through the pointer with 0 and then index the array
-        create_gep(producer, self.signals, &[zero(producer), index])
+        create_gep(producer, self.get_signals_ptr(), &[zero(producer), index])
     }
 
     fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
-        self.signals.into()
+        self.get_signals_ptr().into()
     }
 }
 
diff --git a/code_producers/src/llvm_elements/instructions.rs b/code_producers/src/llvm_elements/instructions.rs
index f1f85ada6..9e08d7292 100644
--- a/code_producers/src/llvm_elements/instructions.rs
+++ b/code_producers/src/llvm_elements/instructions.rs
@@ -537,21 +537,22 @@ pub fn ensure_int_type_match<'a>(
     val: IntValue<'a>,
     ty: IntType<'a>,
 ) -> IntValue<'a> {
-    if val.get_type() == ty {
+    let val_ty = val.get_type();
+    if val_ty == ty {
         // No conversion needed
         val
-    } else if val.get_type() == bool_type(producer) {
+    } else if val_ty == bool_type(producer) {
         // Zero extend
         producer.llvm().builder.build_int_z_extend(val, ty, "")
     } else if ty == bool_type(producer) {
         // Convert to bool
         ensure_bool(producer, val).into_int_value()
+    } else if val_ty.get_bit_width() < ty.get_bit_width() {
+        producer.llvm().builder.build_int_s_extend(val, ty, "")
     } else {
         panic!(
             "Unhandled int conversion of value '{:?}': {:?} to {:?} not supported!",
-            val,
-            val.get_type(),
-            ty
+            val, val_ty, ty
         )
     }
 }
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index 10a5d1717..8232758aa 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -45,7 +45,10 @@ impl Default for Circuit {
 }
 
 impl WriteLLVMIR for Circuit {
-    fn produce_llvm_ir<'a, 'b>(&self, producer: &'b dyn LLVMIRProducer<'a>) -> Option<LLVMInstruction<'a>> {
+    fn produce_llvm_ir<'a, 'b>(
+        &self,
+        producer: &'b dyn LLVMIRProducer<'a>,
+    ) -> Option<LLVMInstruction<'a>> {
         // Code for prelude
 
         // Code for standard library?
@@ -54,7 +57,11 @@ impl WriteLLVMIR for Circuit {
 
         // Generate all the switch functions
         let mut ranges = HashSet::new();
-        let mappings = [&self.llvm_data.signal_index_mapping, &self.llvm_data.variable_index_mapping, &self.llvm_data.component_index_mapping];
+        let mappings = [
+            &self.llvm_data.signal_index_mapping,
+            &self.llvm_data.variable_index_mapping,
+            &self.llvm_data.component_index_mapping,
+        ];
 
         for mapping in mappings {
             for range_mapping in mapping.values() {
@@ -103,13 +110,34 @@ impl WriteLLVMIR for Circuit {
                 f.get_line(),
                 f.name.as_str(),
                 name,
-                if f.returns.is_empty() || (f.returns.len() == 1 && *f.returns.get(0).unwrap() == 1)
-                {
-                    bigint_type(producer).fn_type(&param_types, false)
+                if f.returns.len() == 1 {
+                    let single_size = *f.returns.get(0).unwrap();
+                    if single_size == 0 {
+                        //single dimension of size 0 indicates [0 x i256]* should be used
+                        bigint_type(producer)
+                            .array_type(0)
+                            .ptr_type(Default::default())
+                            .fn_type(&param_types, false)
+                    } else if single_size == 1 {
+                        // single dimension of size 1 is a scalar
+                        bigint_type(producer).fn_type(&param_types, false)
+                    } else {
+                        // single dimension size>1 must return via pointer argument
+                        void_type(producer).fn_type(&param_types, false)
+                    }
                 } else {
+                    // multiple dimensions must return via pointer argument
                     void_type(producer).fn_type(&param_types, false)
                 },
             );
+
+            // Preserve names (only for generated b/c source functions use only 1 argument)
+            if name.starts_with(GENERATED_FN_PREFIX) {
+                for (i, p) in f.params.iter().enumerate() {
+                    function.get_nth_param(i as u32).unwrap().set_name(&p.name);
+                }
+            }
+
             funcs.insert(name, function);
         }
 
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 73982ebe4..7d72dfed2 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -172,10 +172,12 @@ impl StoreBucket{
         if let AddressType::SubcmpSignal { cmp_address, .. } = &dest_address_type {
             let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
             let counter = producer.template_ctx().load_subcmp_counter(producer, addr);
-            let value = create_load_with_name(producer, counter, "load.subcmp.counter");
-            let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, 1), "decrement.counter");
-            assert_eq!(1, context.size, "unhandled array store");
-            create_store(producer, counter, new_value);
+            if !counter.is_null() {
+                let value = create_load_with_name(producer, counter, "load.subcmp.counter");
+                let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, 1), "decrement.counter");
+                assert_eq!(1, context.size, "unhandled array store");
+                create_store(producer, counter, new_value);
+            }
         }
 
         let sub_cmp_name = match &dest {

From 539f6baa59f88e308f8b0e4b40d388e705a8ab85 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 6 Sep 2023 10:15:34 -0500
Subject: [PATCH 08/42] [VAN-609] inline the array indexing/identity functions

---
 code_producers/src/llvm_elements/mod.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index 30af4796b..e7beb0cb8 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -11,6 +11,7 @@ use inkwell::builder::Builder;
 use inkwell::context::{Context, ContextRef};
 use inkwell::debug_info::{DebugInfoBuilder, DICompileUnit};
 use inkwell::module::Module;
+use inkwell::passes::PassManager;
 use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, IntType};
 use inkwell::values::{ArrayValue, BasicMetadataValueEnum, BasicValueEnum, IntValue, PointerValue};
 pub use inkwell::types::AnyType;
@@ -316,6 +317,10 @@ impl<'a> LLVM<'a> {
     }
 
     pub fn write_to_file(&self, path: &str) -> Result<(), ()> {
+        let pm = PassManager::create(());
+        pm.add_always_inliner_pass();
+        pm.run_on(&self.module);
+
         // Must finalize all debug info before running the verifier
         for dbg in self.debug.values() {
             dbg.0.finalize();

From fb638f5e3364689c3632e60744ce97c08f4976cd Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 6 Sep 2023 12:31:14 -0500
Subject: [PATCH 09/42] fix scalar return source functions

---
 circuit_passes/src/passes/loop_unroll.rs | 11 ++++++++++-
 compiler/src/circuit_design/circuit.rs   |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index a21a4e11b..1c32872d3 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -489,7 +489,7 @@ impl LoopUnrollPass {
             ..FunctionCodeInfo::default()
         });
         // Store the function to be transformed and added to circuit later
-        self.new_body_functions.borrow_mut().push(new_func); 
+        self.new_body_functions.borrow_mut().push(new_func);
         func_name
     }
 
@@ -834,6 +834,15 @@ impl CircuitTransformationPass for LoopUnrollPass {
     }
 
     fn post_hook_circuit(&self, cir: &mut Circuit) {
+        // Normalize return type on source functions for "WriteLLVMIR for Circuit"
+        //  which treats a 1-D vector of size 1 as a scalar return and an empty
+        //  vector as "void" return type (the initial Circuit builder uses empty
+        //  for scalar returns because it doesn't consider "void" return possible).
+        for f in &mut cir.functions {
+            if f.returns.is_empty() {
+                f.returns = vec![1];
+            }
+        }
         // Transform and add the new body functions
         for f in self.new_body_functions.borrow().iter() {
             cir.functions.push(self.transform_function(&f));
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index 8232758aa..f80ba5b0d 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -127,6 +127,7 @@ impl WriteLLVMIR for Circuit {
                     }
                 } else {
                     // multiple dimensions must return via pointer argument
+                    //  and zero dimensions indicates void return
                     void_type(producer).fn_type(&param_types, false)
                 },
             );

From bbf265872ca7273ad61287850d96c101449d6f65 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 7 Sep 2023 11:20:02 -0500
Subject: [PATCH 10/42] fix tests other than nested loops and subcomps

---
 Cargo.lock                                    |  21 +++
 circom/tests/loops/call_inside_loop.circom    |   2 +-
 circom/tests/loops/fib_input.circom           |   2 +-
 circom/tests/loops/fib_template.circom        |   2 +-
 circom/tests/loops/for_known.circom           |   2 +-
 circom/tests/loops/for_unknown.circom         |   2 +-
 circom/tests/loops/for_unknown_index.circom   |   2 +-
 circom/tests/loops/inner_loop_simple.circom   |   6 +-
 circom/tests/loops/inner_loops.circom         |   7 +-
 circom/tests/loops/inner_loops2.circom        |   2 +-
 circom/tests/loops/inner_loops3.circom        |   2 +-
 circom/tests/loops/inner_loops4.circom        |   2 +-
 circom/tests/loops/inner_loops5.circom        |  21 +++
 circom/tests/loops/inner_loops6.circom        |  69 +++++++
 circom/tests/loops/known_function.circom      |   2 +-
 circom/tests/loops/known_signal_value.circom  |   2 +-
 circom/tests/loops/simple_variant_idx.circom  |  77 ++++++++
 .../loops/unknown_index_from_array.circom     |  26 +++
 .../loops/unknown_index_from_function.circom  |  35 ++++
 .../loops/unknown_local_array_index.circom    |   2 +-
 .../tests/loops/unknown_loop_component.circom |   2 +-
 circom/tests/loops/unknown_loop_index.circom  |   2 +-
 circom/tests/loops/unknown_loop_oob.circom    |   2 +-
 circom/tests/loops/vanguard-uc-comp.circom    | 168 +++++++++++-------
 .../tests/loops/variant_idx_in_loop_A.circom  |  55 ++++++
 .../tests/loops/variant_idx_in_loop_B.circom  |  73 ++++++++
 .../tests/loops/variant_idx_in_loop_C.circom  |  29 +++
 circuit_passes/Cargo.toml                     |   3 +-
 circuit_passes/src/bucket_interpreter/env.rs  |  14 ++
 circuit_passes/src/bucket_interpreter/mod.rs  |  34 ++--
 circuit_passes/src/passes/loop_unroll.rs      |  23 +--
 circuit_passes/src/passes/mod.rs              |   3 +
 code_producers/src/llvm_elements/fr.rs        |   2 +-
 33 files changed, 584 insertions(+), 112 deletions(-)
 create mode 100644 circom/tests/loops/inner_loops5.circom
 create mode 100644 circom/tests/loops/inner_loops6.circom
 create mode 100644 circom/tests/loops/simple_variant_idx.circom
 create mode 100644 circom/tests/loops/unknown_index_from_array.circom
 create mode 100644 circom/tests/loops/unknown_index_from_function.circom
 create mode 100644 circom/tests/loops/variant_idx_in_loop_A.circom
 create mode 100644 circom/tests/loops/variant_idx_in_loop_B.circom
 create mode 100644 circom/tests/loops/variant_idx_in_loop_C.circom

diff --git a/Cargo.lock b/Cargo.lock
index 8fac819e5..27269ad96 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -218,6 +218,7 @@ dependencies = [
  "circom_algebra",
  "code_producers",
  "compiler",
+ "const_format",
  "constraint_generation",
  "intervallum",
  "program_structure",
@@ -285,6 +286,26 @@ dependencies = [
  "rand 0.8.5",
 ]
 
+[[package]]
+name = "const_format"
+version = "0.2.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c990efc7a285731f9a4378d81aff2f0e85a2c8781a05ef0f8baa8dac54d0ff48"
+dependencies = [
+ "const_format_proc_macros",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e026b6ce194a874cb9cf32cd5772d1ef9767cc8fcb5765948d74f37a9d8b2bf6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
 [[package]]
 name = "constant_tracking"
 version = "2.0.0"
diff --git a/circom/tests/loops/call_inside_loop.circom b/circom/tests/loops/call_inside_loop.circom
index c0bfc1822..58a92a265 100644
--- a/circom/tests/loops/call_inside_loop.circom
+++ b/circom/tests/loops/call_inside_loop.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 // XFAIL: .*
 
 function fun(a, n, b, c, d, e, f, g) {
diff --git a/circom/tests/loops/fib_input.circom b/circom/tests/loops/fib_input.circom
index 6b664f2e6..e821b45bb 100644
--- a/circom/tests/loops/fib_input.circom
+++ b/circom/tests/loops/fib_input.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Fibonacci() {
     signal input nth_fib;
diff --git a/circom/tests/loops/fib_template.circom b/circom/tests/loops/fib_template.circom
index c3ee4e755..003e1fcf1 100644
--- a/circom/tests/loops/fib_template.circom
+++ b/circom/tests/loops/fib_template.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template FibonacciTmpl(N) {
     signal output out;
diff --git a/circom/tests/loops/for_known.circom b/circom/tests/loops/for_known.circom
index 77b7f14ae..acbecf2ed 100644
--- a/circom/tests/loops/for_known.circom
+++ b/circom/tests/loops/for_known.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForKnown(N) {
     signal output out;
diff --git a/circom/tests/loops/for_unknown.circom b/circom/tests/loops/for_unknown.circom
index 888061553..b3d522043 100644
--- a/circom/tests/loops/for_unknown.circom
+++ b/circom/tests/loops/for_unknown.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknown() {
     signal input in;
diff --git a/circom/tests/loops/for_unknown_index.circom b/circom/tests/loops/for_unknown_index.circom
index 87bde3fbb..091cbd8d7 100644
--- a/circom/tests/loops/for_unknown_index.circom
+++ b/circom/tests/loops/for_unknown_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknownIndex() {
     signal input in;
diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index a9c14299c..f3eb9c4e1 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n, m) {
     signal input in[m];
@@ -17,8 +17,8 @@ template InnerLoops(n, m) {
 
 component main = InnerLoops(2, 3);
 
-//signal_arena = { out, in[0], in[1], in[2] }
-//lvars = { n, m, b[0], b[1], i, j }
+// %0 (i.e. signal arena) = { out, in[0], in[1], in[2] }
+// %lvars = { n, m, b[0], b[1], i, j }
 
 //CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run
 //CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
diff --git a/circom/tests/loops/inner_loops.circom b/circom/tests/loops/inner_loops.circom
index 7205dfb5f..4e22579fa 100644
--- a/circom/tests/loops/inner_loops.circom
+++ b/circom/tests/loops/inner_loops.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
@@ -15,8 +15,9 @@ template InnerLoops(n) {
 
 component main = InnerLoops(2);
 //
-//ARG = { a[0], a[1] }
-//lvars = { n, b[0], b[1], i, j }
+// %0 (i.e. signal arena) = { a[0], a[1] }
+// %lvars = { n, b[0], b[1], i, j }
+//
 //unrolled code:
 //	b[0] = b[0] + a[0 - 0 = 0];
 //	b[1] = b[1] + a[1 - 0 = 1];
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index fe405a82f..06aea4d52 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index 9c193f329..509cac284 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops4.circom b/circom/tests/loops/inner_loops4.circom
index a85aa859b..cdfa1d1ee 100644
--- a/circom/tests/loops/inner_loops4.circom
+++ b/circom/tests/loops/inner_loops4.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops5.circom b/circom/tests/loops/inner_loops5.circom
new file mode 100644
index 000000000..52129b7a1
--- /dev/null
+++ b/circom/tests/loops/inner_loops5.circom
@@ -0,0 +1,21 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+
+// %0 (i.e. signal arena)  = [ out, in ]
+// %lvars =  [ n, temp, i, j ]
+// %subcmps = []
+template Num2Bits(n) {
+    signal input in;
+    signal output out;
+
+	var temp = 0;
+    for (var i = 0; i < n; i++) {
+    	for (var j = 0; j < n; j++) {
+        	temp += (in >> j) & 1;
+        }
+    }
+}
+
+component main = Num2Bits(4);
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
new file mode 100644
index 000000000..2f56e8d32
--- /dev/null
+++ b/circom/tests/loops/inner_loops6.circom
@@ -0,0 +1,69 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+
+// %0 (i.e. signal arena)  = [ out[0], out[1], out[2], out[3], out[4], in ]
+// %lvars =  [ n, lc1, e2, i ]
+// %subcmps = []
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n*n];
+//     signal output out[n];
+    for (var i = 0; i < n; i++) {
+    	for (var j = 0; j < n; j++) {
+        	out[i*n + j] <-- (in >> j) & 1;
+        }
+//         out[i] <-- (in >> i) & 1;
+    }
+}
+
+component main = Num2Bits(5);
+
+//NOTE: For indexing dependent on the loop variable, need to compute pointer
+//	reference outside of the function call. All else can be done inside.
+//Q: What if there are more complex loop conditions? Or multiple iteration variables?
+// 
+//With array storage allocation:
+// template Num2Bits(arena[6]*) {
+// 	   lvars[4];
+// 	   subcmps[0];
+// 	
+//     lvars[1] = 0;
+//     lvars[2] = 1;
+//     for (var i = 0; i < lvars[0]; i++) {  // i == lvars[3]
+//         arena[i] <-- (arena[5] >> i) & 1;
+//         lvars[1] += arena[i] * lvars[2];
+//         lvars[2] = lvars[2] + lvars[2];
+//     }
+// 
+//     lvars[1] === arena[5];
+// }
+//
+//With loop body extracted:
+// function loop_body(arena[6]*, lvars[4]*, subcmps[0]*, i, arena_i*) {
+//     arena_i <-- (arena[5] >> i) & 1;
+//     lvars[1] += arena_i * lvars[2];
+//     lvars[2] = lvars[2] + lvars[2];
+// }
+// template Num2Bits(arena[6]*) {
+//     lvars[4];
+//     subcmps[0];
+// 	
+//     lvars[1] = 0;
+//     lvars[2] = 1;
+//     for (var i = 0; i < lvars[0]; i++) {  // i == lvars[3]
+//         loop_body(arena, &lvars, &subcmps, i, %arena[i]);
+//     }
+// 
+//     lvars[1] === arena[5];
+// }
+//
+
+
+
+
+
+
+
+
diff --git a/circom/tests/loops/known_function.circom b/circom/tests/loops/known_function.circom
index 65b837850..d88d7b790 100644
--- a/circom/tests/loops/known_function.circom
+++ b/circom/tests/loops/known_function.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 function funWithLoop(n) {
 	var acc = 0;
diff --git a/circom/tests/loops/known_signal_value.circom b/circom/tests/loops/known_signal_value.circom
index 6038f1785..881e15fa8 100644
--- a/circom/tests/loops/known_signal_value.circom
+++ b/circom/tests/loops/known_signal_value.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template accumulate() {
     signal input i;
diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
new file mode 100644
index 000000000..47a7a4bd4
--- /dev/null
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -0,0 +1,77 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template SimpleVariantIdx(n) {
+    signal input in;
+    signal output out[n];
+
+	var lc;
+    for (var i = 0; i < n; i++) {
+        out[i] <-- in;	//StoreBucket
+        lc = out[i];	//StoreBucket
+        //i++			//StoreBucket
+    }
+}
+
+component main = SimpleVariantIdx(3);
+
+//NOTE: For indexing dependent on the loop variable, need to compute pointer
+//	reference outside of the body function call. All else can be done inside.
+//
+// %0 (i.e. signal arena) = [ out[0], out[1], out[2], in ]
+// %lvars =  [ n, lc, i ]
+// %subcmps = []
+//
+// NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1) {{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %4, i256* %5, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SimpleVariantIdx_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
+//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
+//CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/unknown_index_from_array.circom b/circom/tests/loops/unknown_index_from_array.circom
new file mode 100644
index 000000000..a1f033191
--- /dev/null
+++ b/circom/tests/loops/unknown_index_from_array.circom
@@ -0,0 +1,26 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Example(n) {
+    signal input a[n];
+    signal input b[n];
+    signal output c[n];
+
+    for(var i = 0; i < n; i++) {
+        c[i] <-- a[b[2]];
+    }
+}
+
+component main = Example(3);
+
+// %0 (i.e. signal arena)  = { c[0], c[1], c[2] , a[0], a[1], a[2], b[0], b[1], b[2] }
+// %lvars = { n, i }
+//
+//CHECK-LABEL: define void @Example_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
+//
+//NOTE: Current implementation of loop body extraction does not move this loop body to
+//  a new function because the index of 'a' is unknown (i.e. loaded from signal 'b'). 
diff --git a/circom/tests/loops/unknown_index_from_function.circom b/circom/tests/loops/unknown_index_from_function.circom
new file mode 100644
index 000000000..7f45c66ef
--- /dev/null
+++ b/circom/tests/loops/unknown_index_from_function.circom
@@ -0,0 +1,35 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+function identity(n) {
+    return n;
+}
+
+template Example(n) {
+    signal input a[n];
+    signal input b;
+    signal output c[n];
+    
+    for(var i = 0; i < n; i++) {
+        c[i] <-- a[identity(b)];
+        //Circom AST splits this into 2 nodes:
+        //	CALL: lvars[2] = identity(b)
+        //	STORE: c[i] = a[lvars[2]]
+        //Then the loop variable increment is the 3rd statement
+        //	STORE: i = i + 1
+    }
+}
+
+component main = Example(3);
+
+// %0 (i.e. signal arena) { c[0], c[1], c[2] , a[0], a[1], a[2], b }
+// %lvars = { n, i, <identity_result> }
+//
+//CHECK-LABEL: define void @Example_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
+//
+//NOTE: Current implementation of loop body extraction does not move this loop body to
+//  a new function because the index of 'a' is unknown (i.e. function return value). 
diff --git a/circom/tests/loops/unknown_local_array_index.circom b/circom/tests/loops/unknown_local_array_index.circom
index 77b690851..2cdcf38be 100644
--- a/circom/tests/loops/unknown_local_array_index.circom
+++ b/circom/tests/loops/unknown_local_array_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknownIndex() {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_component.circom b/circom/tests/loops/unknown_loop_component.circom
index e20626b59..53ffd97af 100644
--- a/circom/tests/loops/unknown_loop_component.circom
+++ b/circom/tests/loops/unknown_loop_component.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template nbits() {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_index.circom b/circom/tests/loops/unknown_loop_index.circom
index b9656b80f..9b75d7e9d 100644
--- a/circom/tests/loops/unknown_loop_index.circom
+++ b/circom/tests/loops/unknown_loop_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Num2Bits(n) {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_oob.circom b/circom/tests/loops/unknown_loop_oob.circom
index d167fb514..6b8c03676 100644
--- a/circom/tests/loops/unknown_loop_oob.circom
+++ b/circom/tests/loops/unknown_loop_oob.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template accumulate() {
     signal input i;
diff --git a/circom/tests/loops/vanguard-uc-comp.circom b/circom/tests/loops/vanguard-uc-comp.circom
index 25152ee2a..9ceb64176 100644
--- a/circom/tests/loops/vanguard-uc-comp.circom
+++ b/circom/tests/loops/vanguard-uc-comp.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Num2Bits(n) {
     signal input in;
@@ -20,74 +20,108 @@ template Num2Bits(n) {
 
 component main = Num2Bits(2);
 
-// %arena (i.e. %0 param) = [ out[0], out[1], in ]
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
 // %lvars =  [ n, lc1, e2, i ]
 // %subcmps = []
 //
+// NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1, [0 x i256]* %fixed_2, [0 x i256]* %fixed_3) {{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$FNUM]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_bit_and, i256* %4, align 4
+//CHECK-NEXT:   br label %assert2
+//CHECK-EMPTY: 
+//CHECK-NEXT: assert2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 1)
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %6, i256 %call.fr_sub)
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %call.fr_mul, i256 0)
+//CHECK-NEXT:   call void @__assert(i1 %call.fr_eq)
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_mul1 = call i256 @fr_mul(i256 %12, i256 %14)
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %call.fr_mul1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %19 = load i256, i256* %18, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %17, i256 %19)
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %20, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %22 = load i256, i256* %21, align 4
+//CHECK-NEXT:   %call.fr_add3 = call i256 @fr_add(i256 %22, i256 1)
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add3, i256* %23, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
 //CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run
 //CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:  %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-//CHECK-NEXT:  %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:  %7 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:  %call.fr_shr = call i256 @fr_shr(i256 %6, i256 %8)
-//CHECK-NEXT:  %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
-//CHECK-NEXT:  %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  store i256 %call.fr_bit_and, i256* %9, align 4
-//CHECK-NEXT:  %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:  %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %13 = load i256, i256* %12, align 4
-//CHECK-NEXT:  %call.fr_sub = call i256 @fr_sub(i256 %13, i256 1)
-//CHECK-NEXT:  %call.fr_mul = call i256 @fr_mul(i256 %11, i256 %call.fr_sub)
-//CHECK-NEXT:  %call.fr_eq = call i1 @fr_eq(i256 %call.fr_mul, i256 0)
-//CHECK-NEXT:  call void @__assert(i1 %call.fr_eq)
-//CHECK-NEXT:  %constraint = alloca i1, align 1
-//CHECK-NEXT:  call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
-//CHECK-NEXT:  %14 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  %15 = load i256, i256* %14, align 4
-//CHECK-NEXT:  %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %17 = load i256, i256* %16, align 4
-//CHECK-NEXT:  %18 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  %19 = load i256, i256* %18, align 4
-//CHECK-NEXT:  %call.fr_mul1 = call i256 @fr_mul(i256 %17, i256 %19)
-//CHECK-NEXT:  %call.fr_add = call i256 @fr_add(i256 %15, i256 %call.fr_mul1)
-//CHECK-NEXT:  %20 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_add, i256* %20, align 4
-//CHECK-NEXT:  %21 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  store i256 2, i256* %21, align 4
-//CHECK-NEXT:  %22 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  store i256 1, i256* %22, align 4
-//CHECK-NEXT:  %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-//CHECK-NEXT:  %24 = load i256, i256* %23, align 4
-//CHECK-NEXT:  %25 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  %26 = load i256, i256* %25, align 4
-//CHECK-NEXT:  %call.fr_shr2 = call i256 @fr_shr(i256 %24, i256 %26)
-//CHECK-NEXT:  %call.fr_bit_and3 = call i256 @fr_bit_and(i256 %call.fr_shr2, i256 1)
-//CHECK-NEXT:  %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_bit_and3, i256* %27, align 4
-//CHECK-NEXT:  %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %29 = load i256, i256* %28, align 4
-//CHECK-NEXT:  %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %31 = load i256, i256* %30, align 4
-//CHECK-NEXT:  %call.fr_sub4 = call i256 @fr_sub(i256 %31, i256 1)
-//CHECK-NEXT:  %call.fr_mul5 = call i256 @fr_mul(i256 %29, i256 %call.fr_sub4)
-//CHECK-NEXT:  %call.fr_eq6 = call i1 @fr_eq(i256 %call.fr_mul5, i256 0)
-//CHECK-NEXT:  call void @__assert(i1 %call.fr_eq6)
-//CHECK-NEXT:  %constraint7 = alloca i1, align 1
-//CHECK-NEXT:  call void @__constraint_value(i1 %call.fr_eq6, i1* %constraint7)
-//CHECK-NEXT:  %32 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  %33 = load i256, i256* %32, align 4
-//CHECK-NEXT:  %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %35 = load i256, i256* %34, align 4
-//CHECK-NEXT:  %36 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  %37 = load i256, i256* %36, align 4
-//CHECK-NEXT:  %call.fr_mul8 = call i256 @fr_mul(i256 %35, i256 %37)
-//CHECK-NEXT:  %call.fr_add9 = call i256 @fr_add(i256 %33, i256 %call.fr_mul8)
-//CHECK-NEXT:  %38 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_add9, i256* %38, align 4
-//CHECK-NEXT:  %39 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  store i256 4, i256* %39, align 4
-//CHECK-NEXT:  %40 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  store i256 2, i256* %40, align 4
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %7 = bitcast i256* %6 to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %7, [0 x i256]* %9, [0 x i256]* %11, [0 x i256]* %13)
+//CHECK-NEXT:   %14 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %20 = bitcast i256* %19 to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18, [0 x i256]* %20, [0 x i256]* %22)
+//CHECK-NEXT:   br label %assert{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: assert{{[0-9]+}}:
+//CHECK-NEXT:   %23 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %24 = load i256, i256* %23, align 4
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %26 = load i256, i256* %25, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %24, i256 %26)
+//CHECK-NEXT:   call void @__assert(i1 %call.fr_eq)
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_A.circom b/circom/tests/loops/variant_idx_in_loop_A.circom
new file mode 100644
index 000000000..2c1b54afc
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_A.circom
@@ -0,0 +1,55 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out[n];
+
+    for (var i = 0; i<n; i++) {
+        out[i] <-- (in >> i);
+    }
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
+// %lvars =  [ n, i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0) {{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %5 = bitcast i256* %4 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %3, [0 x i256]* %0, [0 x i256]* %5)
+//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %6, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_B.circom b/circom/tests/loops/variant_idx_in_loop_B.circom
new file mode 100644
index 000000000..14cfaa453
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_B.circom
@@ -0,0 +1,73 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out;
+
+    var temp[n];
+    for (var i = 0; i<n; i++) {
+        temp[i] = (in >> i);
+    }
+    out <-- temp[0] + temp[1];
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out, in ]
+// %lvars =  [ n, temp[0], temp[1], i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0) {{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
+//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 2
+//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %12)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %13 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %15 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 %16)
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %17, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/variant_idx_in_loop_C.circom b/circom/tests/loops/variant_idx_in_loop_C.circom
new file mode 100644
index 000000000..f3313bc95
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_C.circom
@@ -0,0 +1,29 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out[n*n];
+
+    //Cannot move loop body to a new function. The index for 'out' is computed within
+    //  the loop body which means a pointer to out[x] obtained at the call site for
+    //  the new function and passed as a parameter would point to the wrong memory
+    //  location because it will use the old value of 'x'.
+    var x = 1;
+    for (var i = 0; i<n; i++) {
+        x = x + i;
+        out[x] <-- (in >> i);
+    }
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
+// %lvars =  [ n, lc1, e2, i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
diff --git a/circuit_passes/Cargo.toml b/circuit_passes/Cargo.toml
index e61d301f6..c2ce7ac77 100644
--- a/circuit_passes/Cargo.toml
+++ b/circuit_passes/Cargo.toml
@@ -11,4 +11,5 @@ compiler = {path = "../compiler"}
 program_structure = {path = "../program_structure"}
 code_producers = {path = "../code_producers"}
 intervallum = "1.4.0"
-circom_algebra = {path = "../circom_algebra"}
\ No newline at end of file
+circom_algebra = {path = "../circom_algebra"}
+const_format = "0.2.31"
diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index 08de8cb02..f6915e82b 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -171,6 +171,20 @@ impl<'a> Env<'a> {
         copy
     }
 
+    pub fn set_all_to_unk(self) -> Self {
+        let mut copy = self;
+        for (_, v) in copy.vars.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.signals.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.subcmps.iter_mut() {
+            v.signals.clear();
+        }
+        copy
+    }
+
     /// Sets all the signals of the subcmp to UNK
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         let mut copy = self;
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index c1c1b5ba5..5f8e37b7d 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -4,6 +4,7 @@ pub mod observer;
 pub(crate) mod operations;
 
 use circom_algebra::modular_arithmetic;
+use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer};
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::num_bigint::BigInt;
@@ -14,6 +15,7 @@ use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 use crate::passes::memory::PassMemory;
+use crate::passes::LOOP_BODY_FN_PREFIX;
 
 pub struct BucketInterpreter<'a> {
     pub(crate) observer: &'a dyn InterpreterObserver,
@@ -428,28 +430,38 @@ impl<'a> BucketInterpreter<'a> {
         env: Env<'env>,
         observe: bool,
     ) -> R<'env> {
-        let mut args = vec![];
         let mut env = env;
-        for i in &bucket.arguments {
-            let (value, new_env) = self.execute_instruction(i, env, observe);
-            env = new_env;
-            args.push(value.expect("Function argument must produce a value!"));
-        }
-        let result = if args.iter().any(|v| v.is_unknown()) {
+        let res = if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
+            // The extracted loop body functions can change any values in the environment via the
+            //  parameters passed to it. For now, use the naive approach of setting everything to
+            //  Unknown. This could be improved with special handling for these types of functions.
+            env = env.set_all_to_unk();
+            Unknown
+        } else if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
             Unknown
         } else {
-            env.run_function(&bucket.symbol, self, args, observe)
+            let mut args = vec![];
+            for i in &bucket.arguments {
+                let (value, new_env) = self.execute_instruction(i, env, observe);
+                env = new_env;
+                args.push(value.expect("Function argument must produce a value!"));
+            }
+            if args.iter().any(|v| v.is_unknown()) {
+                Unknown
+            } else {
+                env.run_function(&bucket.symbol, self, args, observe)
+            }
         };
 
-        // Write the result in the destination according to the address type
+        // Write the result in the destination according to the ReturnType
         match &bucket.return_info {
-            ReturnType::Intermediate { .. } => (Some(result), env),
+            ReturnType::Intermediate { .. } => (Some(res), env),
             ReturnType::Final(final_data) => (
                 None,
                 self.store_value_in_address(
                     &final_data.dest_address_type,
                     &final_data.dest,
-                    result,
+                    res,
                     env,
                     observe,
                 ),
diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index 1c32872d3..ec3953a86 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -2,20 +2,19 @@ use std::cell::RefCell;
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use std::vec;
-use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
 use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::hir::very_concrete_program::Param;
 use compiler::intermediate_representation::{
-    BucketId, InstructionList, InstructionPointer, new_id, UpdateId, ToSExp,
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::CircuitTransformationPass;
+use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
 use crate::passes::memory::PassMemory;
 
 struct VariableValues<'a> {
@@ -477,7 +476,7 @@ impl LoopUnrollPass {
         );
         // Create new function to hold the copied body
         // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
-        let func_name = format!("{}loop.body.{}", GENERATED_FN_PREFIX, new_id());
+        let func_name = format!("{}{}", LOOP_BODY_FN_PREFIX, new_id());
         let new_func = Box::new(FunctionCodeInfo {
             source_file_id: bucket.source_file_id,
             line: bucket.line,
@@ -696,13 +695,15 @@ impl LoopUnrollPass {
                         args.push(Self::new_storage_ptr_ref(bucket, AddressType::Variable));
                         // Parameter for signals/arena
                         args.push(Self::new_storage_ptr_ref(bucket, AddressType::Signal));
-                        //Additional parameters for variant vector/array access within the loop
-                        for a in &iter_to_loc[&iter_num] {
-                            args.push(Self::new_indexed_storage_ptr_ref(
-                                bucket,
-                                a.0.clone(),
-                                a.1.get_u32(),
-                            ));
+                        // Additional parameters for variant vector/array access within the loop
+                        if !iter_to_loc.is_empty() {
+                            for a in &iter_to_loc[&iter_num] {
+                                args.push(Self::new_indexed_storage_ptr_ref(
+                                    bucket,
+                                    a.0.clone(),
+                                    a.1.get_u32(),
+                                ));
+                            }
                         }
                         block_body.push(
                             CallBucket {
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 68f73d5de..a10f1e9bf 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -4,6 +4,7 @@ use compiler::circuit_design::template::{TemplateCode, TemplateCodeInfo};
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
+use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use crate::passes::{
     conditional_flattening::ConditionalFlattening,
     deterministic_subcomponent_invocation::DeterministicSubCmpInvokePass,
@@ -21,6 +22,8 @@ mod unknown_index_sanitization;
 mod checks;
 pub(crate) mod memory;
 
+pub const LOOP_BODY_FN_PREFIX: &str = const_format::concatcp!(GENERATED_FN_PREFIX, "loop.body.");
+
 macro_rules! pre_hook {
     ($name: ident, $bucket_ty: ty) => {
         fn $name(&self, _bucket: &$bucket_ty) {}
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index ee64c04df..f98b6249f 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -321,7 +321,7 @@ fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     arr.set_name("arr");
     idx.set_name("idx");
 
-    let main = create_bb(producer, func, FR_IDENTITY_ARR_PTR);
+    let main = create_bb(producer, func, FR_INDEX_ARR_PTR);
     producer.set_current_bb(main);
     let gep =
         create_gep(producer, arr.into_pointer_value(), &[zero(producer), idx.into_int_value()]);

From 609d26195e07016eff8dcc3f0dd6164e3014f53b Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 7 Sep 2023 11:20:49 -0500
Subject: [PATCH 11/42] one more test fix

---
 circom/tests/loops/simple_variant_idx.circom | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
index 47a7a4bd4..ac922722d 100644
--- a/circom/tests/loops/simple_variant_idx.circom
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -26,7 +26,7 @@ component main = SimpleVariantIdx(3);
 // NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1) {{.*}} {
+//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1) {{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
@@ -61,17 +61,17 @@ component main = SimpleVariantIdx(3);
 //CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
 //CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
 //CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
 //CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
 //CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
 //CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
 //CHECK-NEXT:   br label %prologue

From 6da25a1062f78d84febbac5aba5e291b4aadd5e6 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 7 Sep 2023 13:47:15 -0500
Subject: [PATCH 12/42] fix loop_unroll unit test

---
 circuit_passes/src/passes/loop_unroll.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index ec3953a86..5762bd2e3 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -885,7 +885,7 @@ mod test {
         AddressType, Allocate, ComputeBucket, InstrContext, LoadBucket, LocationRule, LoopBucket,
         OperatorType, StoreBucket, ValueBucket, ValueType,
     };
-    use crate::passes::CircuitTransformationPass;
+    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
     use crate::passes::loop_unroll::LoopUnrollPass;
 
     #[test]
@@ -902,7 +902,15 @@ mod test {
         }
         assert_ne!(circuit, new_circuit);
         match new_circuit.templates[0].body.last().unwrap().as_ref() {
-            Instruction::Block(b) => assert_eq!(b.body.len(), 10), // 5 iterations unrolled times 2 statements in the loop body
+            Instruction::Block(b) => {
+                // 5 iterations unrolled into 5 call statements targeting extracted loop body functions
+                assert_eq!(b.body.len(), 5);
+                assert!(b.body.iter().all(|s| if let Instruction::Call(c) = s.as_ref() {
+                    c.symbol.starts_with(LOOP_BODY_FN_PREFIX)
+                } else {
+                    false
+                }));
+            }
             _ => assert!(false),
         }
     }

From d6621d358d0d3de2d61a25d67ebfc348bba53f3b Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 7 Sep 2023 13:48:15 -0500
Subject: [PATCH 13/42] temporarily XFAIL subcomp and nested loop tests

---
 circom/tests/loops/inner_loop_simple.circom | 1 +
 circom/tests/loops/inner_loops.circom       | 1 +
 circom/tests/loops/inner_loops2.circom      | 1 +
 circom/tests/loops/inner_loops3.circom      | 1 +
 circom/tests/loops/inner_loops4.circom      | 1 +
 circom/tests/loops/inner_loops5.circom      | 1 +
 circom/tests/loops/inner_loops6.circom      | 2 +-
 circom/tests/subcmps/mapped.circom          | 1 +
 circom/tests/subcmps/mapped2.circom         | 1 +
 circom/tests/subcmps/mapped3.circom         | 1 +
 circom/tests/subcmps/mapped4.circom         | 1 +
 circom/tests/subcmps/subcmps1.circom        | 1 +
 circom/tests/subcmps/subcmps2.circom        | 1 +
 13 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index f3eb9c4e1..0ddcb67fb 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 template InnerLoops(n, m) {
     signal input in[m];
diff --git a/circom/tests/loops/inner_loops.circom b/circom/tests/loops/inner_loops.circom
index 4e22579fa..845b1c3e3 100644
--- a/circom/tests/loops/inner_loops.circom
+++ b/circom/tests/loops/inner_loops.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index 06aea4d52..a1dd22dd5 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index 509cac284..fbaf1906d 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops4.circom b/circom/tests/loops/inner_loops4.circom
index cdfa1d1ee..e51a9d7ee 100644
--- a/circom/tests/loops/inner_loops4.circom
+++ b/circom/tests/loops/inner_loops4.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
diff --git a/circom/tests/loops/inner_loops5.circom b/circom/tests/loops/inner_loops5.circom
index 52129b7a1..732d5abcb 100644
--- a/circom/tests/loops/inner_loops5.circom
+++ b/circom/tests/loops/inner_loops5.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
 
 
 // %0 (i.e. signal arena)  = [ out, in ]
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
index 2f56e8d32..669f05c70 100644
--- a/circom/tests/loops/inner_loops6.circom
+++ b/circom/tests/loops/inner_loops6.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-
+// XFAIL:.*
 
 // %0 (i.e. signal arena)  = [ out[0], out[1], out[2], out[3], out[4], in ]
 // %lvars =  [ n, lc1, e2, i ]
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index f98d272e3..3d673f411 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -2,6 +2,7 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// XFAIL:.*
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index 0299af906..bfc0b3869 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -2,6 +2,7 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// XFAIL:.*
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index 61b330457..0f1837a7b 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// XFAIL:.*
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index 279ca8834..903d10093 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// XFAIL:.*
 
 template MatrixOp(q) {
     signal input inp[5][3];
diff --git a/circom/tests/subcmps/subcmps1.circom b/circom/tests/subcmps/subcmps1.circom
index b58b4bcf4..75d73fd60 100644
--- a/circom/tests/subcmps/subcmps1.circom
+++ b/circom/tests/subcmps/subcmps1.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// XFAIL:.*
 
 template IsZero() {
     signal input in;
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index 6f1668e7e..9dbffd486 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.6;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// XFAIL:.*
 
 template Sum(n) {
     signal input inp[n];

From ad2ee2f7bf8e4b7927b97cdd0f336e5d32762a07 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Fri, 8 Sep 2023 09:30:29 -0500
Subject: [PATCH 14/42] fix/update some tests

---
 circom/tests/loops/inner_loop_simple.circom   | 168 +++++++-----
 circom/tests/loops/inner_loops2.circom        | 241 +++++++++++++++++-
 circom/tests/loops/inner_loops3.circom        | 216 +++++++++++++++-
 circom/tests/loops/simple_variant_idx.circom  |   8 +-
 circom/tests/loops/vanguard-uc-comp.circom    |   8 +-
 .../tests/loops/variant_idx_in_loop_A.circom  |   6 +-
 .../tests/loops/variant_idx_in_loop_B.circom  |   6 +-
 code_producers/src/llvm_elements/mod.rs       |   1 +
 8 files changed, 581 insertions(+), 73 deletions(-)

diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index 0ddcb67fb..d06b4fefd 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
 template InnerLoops(n, m) {
     signal input in[m];
@@ -20,59 +19,114 @@ component main = InnerLoops(2, 3);
 
 // %0 (i.e. signal arena) = { out, in[0], in[1], in[2] }
 // %lvars = { n, m, b[0], b[1], i, j }
-
-//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %[[T06:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T06]], align 4
-//CHECK-NEXT:   %[[T07:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T08:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T07]], align 4
-//CHECK-NEXT:   %[[T09:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T08]], i256* %{{.*}}[[T09]], align 4
-//CHECK-NEXT:   %[[T10:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T10]], align 4
-//CHECK-NEXT:   %[[T11:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 2
-//CHECK-NEXT:   %[[T12:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T11]], align 4
-//CHECK-NEXT:   %[[T13:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T12]], i256* %{{.*}}[[T13]], align 4
-//CHECK-NEXT:   %[[T14:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T14]], align 4
-//CHECK-NEXT:   %[[T15:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 3
-//CHECK-NEXT:   %[[T16:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T15]], align 4
-//CHECK-NEXT:   %[[T17:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T16]], i256* %{{.*}}[[T17]], align 4
-//CHECK-NEXT:   %[[T18:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 3, i256* %{{.*}}[[T18]], align 4
-//CHECK-NEXT:   %[[T19:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T19]], align 4
-//CHECK-NEXT:   %[[T20:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T20]], align 4
-//CHECK-NEXT:   %[[T21:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T22:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T21]], align 4
-//CHECK-NEXT:   %[[T23:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T22]], i256* %{{.*}}[[T23]], align 4
-//CHECK-NEXT:   %[[T24:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T24]], align 4
-//CHECK-NEXT:   %[[T25:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 2
-//CHECK-NEXT:   %[[T26:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T25]], align 4
-//CHECK-NEXT:   %[[T27:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T26]], i256* %{{.*}}[[T27]], align 4
-//CHECK-NEXT:   %[[T28:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T28]], align 4
-//CHECK-NEXT:   %[[T29:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 3
-//CHECK-NEXT:   %[[T30:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T29]], align 4
-//CHECK-NEXT:   %[[T31:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T30]], i256* %{{.*}}[[T31]], align 4
-//CHECK-NEXT:   %[[T32:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 3, i256* %{{.*}}[[T32]], align 4
-//CHECK-NEXT:   %[[T33:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T33]], align 4
-//CHECK-NEXT:   br label %store[[LBL:[0-9]+]]
-//CHECK-EMPTY:
-//CHECK-NEXT: store{{.*}}[[LBL]]:
-//CHECK-NEXT:   %[[T34:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T35:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T34]], align 4
-//CHECK-NEXT:   %[[T36:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   store i256 %{{.*}}[[T35]], i256* %{{.*}}[[T36]], align 4
-//CHECK:   }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, [0 x i256]* %9)
+//CHECK-NEXT:   %10 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, [0 x i256]* %12)
+//CHECK-NEXT:   %13 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %15 = bitcast i256* %14 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0, [0 x i256]* %15)
+//CHECK-NEXT:   %16 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %17, i256 1)
+//CHECK-NEXT:   %18 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %18, align 4
+//CHECK-NEXT:   %19 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %19, align 4
+//CHECK-NEXT:   %20 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
+//CHECK-NEXT:   %23 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
+//CHECK-NEXT:   %26 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %28 = bitcast i256* %27 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %26, [0 x i256]* %0, [0 x i256]* %28)
+//CHECK-NEXT:   %29 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %30 = load i256, i256* %29, align 4
+//CHECK-NEXT:   %call.fr_add23 = call i256 @fr_add(i256 %30, i256 1)
+//CHECK-NEXT:   %31 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add23, i256* %31, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %32 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %33 = load i256, i256* %32, align 4
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %33, i256* %34, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index a1dd22dd5..07e98ca23 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
@@ -34,3 +33,243 @@ template InnerLoops(n) {
 }
 
 component main = InnerLoops(5);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %9 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %10 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 1, i256* %10, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %11 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %11, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %12 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %14 = bitcast i256* %13 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, [0 x i256]* %14)
+//CHECK-NEXT:   %15 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %17 = bitcast i256* %16 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, [0 x i256]* %17)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %18 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 2, i256* %18, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %19 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %19, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %20 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
+//CHECK-NEXT:   %23 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
+//CHECK-NEXT:   %26 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %28 = bitcast i256* %27 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %26, [0 x i256]* %0, [0 x i256]* %28)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %29 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 3, i256* %29, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %30 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %30, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %31 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %33 = bitcast i256* %32 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %31, [0 x i256]* %0, [0 x i256]* %33)
+//CHECK-NEXT:   %34 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %36 = bitcast i256* %35 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %34, [0 x i256]* %0, [0 x i256]* %36)
+//CHECK-NEXT:   %37 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %39 = bitcast i256* %38 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %37, [0 x i256]* %0, [0 x i256]* %39)
+//CHECK-NEXT:   %40 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %42 = bitcast i256* %41 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %40, [0 x i256]* %0, [0 x i256]* %42)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %43 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 4, i256* %43, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %44 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %44, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %45 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %47 = bitcast i256* %46 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %45, [0 x i256]* %0, [0 x i256]* %47)
+//CHECK-NEXT:   %48 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %50 = bitcast i256* %49 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %48, [0 x i256]* %0, [0 x i256]* %50)
+//CHECK-NEXT:   %51 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %52 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %53 = bitcast i256* %52 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %51, [0 x i256]* %0, [0 x i256]* %53)
+//CHECK-NEXT:   %54 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %56 = bitcast i256* %55 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %54, [0 x i256]* %0, [0 x i256]* %56)
+//CHECK-NEXT:   %57 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %59 = bitcast i256* %58 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %57, [0 x i256]* %0, [0 x i256]* %59)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %60 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 5, i256* %60, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index fbaf1906d..a3fcfcaa9 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
@@ -25,3 +24,218 @@ template InnerLoops(n) {
 }
 
 component main = InnerLoops(5);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %9 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT:  unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, [0 x i256]* %12)
+//CHECK-NEXT:   %13 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %15 = bitcast i256* %14 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %13, [0 x i256]* %0, [0 x i256]* %15)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %16 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %16, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %19 = bitcast i256* %18 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, [0 x i256]* %19)
+//CHECK-NEXT:   %20 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
+//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %26 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %26, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %27 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %29 = bitcast i256* %28 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %27, [0 x i256]* %0, [0 x i256]* %29)
+//CHECK-NEXT:   %30 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %32 = bitcast i256* %31 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, [0 x i256]* %32)
+//CHECK-NEXT:   %33 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %35 = bitcast i256* %34 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %33, [0 x i256]* %0, [0 x i256]* %35)
+//CHECK-NEXT:   %36 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %38 = bitcast i256* %37 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %36, [0 x i256]* %0, [0 x i256]* %38)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %39 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %39, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %40 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %42 = bitcast i256* %41 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, [0 x i256]* %42)
+//CHECK-NEXT:   %43 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %45 = bitcast i256* %44 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %43, [0 x i256]* %0, [0 x i256]* %45)
+//CHECK-NEXT:   %46 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %48 = bitcast i256* %47 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %46, [0 x i256]* %0, [0 x i256]* %48)
+//CHECK-NEXT:   %49 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %51 = bitcast i256* %50 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %49, [0 x i256]* %0, [0 x i256]* %51)
+//CHECK-NEXT:   %52 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %54 = bitcast i256* %53 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %52, [0 x i256]* %0, [0 x i256]* %54)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
index ac922722d..d272890ec 100644
--- a/circom/tests/loops/simple_variant_idx.circom
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -26,7 +26,7 @@ component main = SimpleVariantIdx(3);
 // NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1) {{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
@@ -61,17 +61,17 @@ component main = SimpleVariantIdx(3);
 //CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
 //CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
 //CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
 //CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
 //CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
 //CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/vanguard-uc-comp.circom b/circom/tests/loops/vanguard-uc-comp.circom
index 9ceb64176..dfdfdb517 100644
--- a/circom/tests/loops/vanguard-uc-comp.circom
+++ b/circom/tests/loops/vanguard-uc-comp.circom
@@ -27,8 +27,8 @@ component main = Num2Bits(2);
 // NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1, [0 x i256]* %fixed_2, [0 x i256]* %fixed_3) {{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$FNUM]]:
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1, [0 x i256]* %fixed_2, [0 x i256]* %fixed_3){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -102,7 +102,7 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %7, [0 x i256]* %9, [0 x i256]* %11, [0 x i256]* %13)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %7, [0 x i256]* %9, [0 x i256]* %11, [0 x i256]* %13)
 //CHECK-NEXT:   %14 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
@@ -112,7 +112,7 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   %20 = bitcast i256* %19 to [0 x i256]*
 //CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18, [0 x i256]* %20, [0 x i256]* %22)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18, [0 x i256]* %20, [0 x i256]* %22)
 //CHECK-NEXT:   br label %assert{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: assert{{[0-9]+}}:
diff --git a/circom/tests/loops/variant_idx_in_loop_A.circom b/circom/tests/loops/variant_idx_in_loop_A.circom
index 2c1b54afc..f196bf25b 100644
--- a/circom/tests/loops/variant_idx_in_loop_A.circom
+++ b/circom/tests/loops/variant_idx_in_loop_A.circom
@@ -18,7 +18,7 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0) {{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
@@ -47,9 +47,9 @@ component main = VariantIndex(2);
 //CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %5 = bitcast i256* %4 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %3, [0 x i256]* %0, [0 x i256]* %5)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, [0 x i256]* %5)
 //CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %6, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %6, [0 x i256]* %0, [0 x i256]* %8)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_B.circom b/circom/tests/loops/variant_idx_in_loop_B.circom
index 14cfaa453..e6020c39a 100644
--- a/circom/tests/loops/variant_idx_in_loop_B.circom
+++ b/circom/tests/loops/variant_idx_in_loop_B.circom
@@ -20,7 +20,7 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$FNUM:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0) {{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
@@ -50,12 +50,12 @@ component main = VariantIndex(2);
 //CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
 //CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %8)
 //CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 2
 //CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$FNUM]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %12)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %12)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index e7beb0cb8..544e49b0b 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -317,6 +317,7 @@ impl<'a> LLVM<'a> {
     }
 
     pub fn write_to_file(&self, path: &str) -> Result<(), ()> {
+        // Run LLVM IR inliner for the FR_IDENTITY_ARR_PTR and FR_INDEX_ARR_PTR functions
         let pm = PassManager::create(());
         pm.add_always_inliner_pass();
         pm.run_on(&self.module);

From 0d93f52c4e46186383bc85630388dff24c3961df Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 11 Sep 2023 08:54:54 -0500
Subject: [PATCH 15/42] add bool to dis/enable loop body extraction

---
 circuit_passes/src/passes/loop_unroll.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
index 5762bd2e3..e28dd50bb 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll.rs
@@ -17,6 +17,7 @@ use crate::bucket_interpreter::value::Value;
 use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
 use crate::passes::memory::PassMemory;
 
+const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
 struct VariableValues<'a> {
     pub env_at_header: Env<'a>,
     pub loadstore_to_index: HashMap<BucketId, (AddressType, Value)>, // key is load/store bucket ID
@@ -668,7 +669,7 @@ impl LoopUnrollPass {
         // println!("recorder = {:?}", recorder); //TODO: TEMP
 
         let mut block_body = vec![];
-        if recorder.is_safe_to_move() {
+        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
             // If the loop body contains more than one instruction, extract it into a new
             // function and generate 'recorder.get_iter()' number of calls to that function.
             // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.

From 60de11a89e6c60152541c5c8d8d7629ca2082862 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 11 Sep 2023 15:15:44 -0500
Subject: [PATCH 16/42] update/add more tests

---
 circom/tests/loops/init_nonzero.circom |  38 ++++++
 circom/tests/loops/inner_loops5.circom |  71 ++++++++++-
 circom/tests/loops/inner_loops6.circom | 156 ++++++++++++++++---------
 3 files changed, 211 insertions(+), 54 deletions(-)
 create mode 100644 circom/tests/loops/init_nonzero.circom

diff --git a/circom/tests/loops/init_nonzero.circom b/circom/tests/loops/init_nonzero.circom
new file mode 100644
index 000000000..9ac1520a6
--- /dev/null
+++ b/circom/tests/loops/init_nonzero.circom
@@ -0,0 +1,38 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Ensure that non-zero initialization of for-loop iteration variable is handled properly.
+template NonZeroInit() {
+    signal input a[9];
+    signal output b[9];
+
+    for (var i = 4; i < 7; i++) {
+        b[i] <-- a[i];
+    }
+    for (var i = 7; i < 9; i++) {
+        b[i] <-- a[i];
+    }
+    for (var i = 0; i < 4; i++) {
+        b[i] <-- a[i];
+    }
+}
+
+component main = NonZeroInit();
+
+//CHECK-LABEL: define void @NonZeroInit_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR1:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %{{.*}}[[VAR1]], align 4
+//CHECK-NEXT:   br label %{{.*}}
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR2:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 7, i256* %{{.*}}[[VAR2]], align 4
+//CHECK-NEXT:   br label %{{.*}}
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR3:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[VAR3]], align 4
+//CHECK-NEXT:   br label %{{.*}}
diff --git a/circom/tests/loops/inner_loops5.circom b/circom/tests/loops/inner_loops5.circom
index 732d5abcb..843b855d4 100644
--- a/circom/tests/loops/inner_loops5.circom
+++ b/circom/tests/loops/inner_loops5.circom
@@ -1,8 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
-
 
 // %0 (i.e. signal arena)  = [ out, in ]
 // %lvars =  [ n, temp, i, j ]
@@ -20,3 +18,72 @@ template Num2Bits(n) {
 }
 
 component main = Num2Bits(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop{{[0-9]+}}:
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %6, i256 %8)
+//CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 %call.fr_bit_and)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %12, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %15, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %4 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
index 669f05c70..f44f6aa7e 100644
--- a/circom/tests/loops/inner_loops6.circom
+++ b/circom/tests/loops/inner_loops6.circom
@@ -1,69 +1,121 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
-// %0 (i.e. signal arena)  = [ out[0], out[1], out[2], out[3], out[4], in ]
-// %lvars =  [ n, lc1, e2, i ]
+// %0 (i.e. signal arena)  = [ out[0], out[1], out[2], out[3], in ]
+// %lvars =  [ n, i, j ]
 // %subcmps = []
 template Num2Bits(n) {
     signal input in;
     signal output out[n*n];
-//     signal output out[n];
+
     for (var i = 0; i < n; i++) {
     	for (var j = 0; j < n; j++) {
-        	out[i*n + j] <-- (in >> j) & 1;
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling.
+            //  This result is logically correct but not optimal because the 2 extracted body
+            //  functions are identical.
+        	out[i*n + j] <-- in;
         }
-//         out[i] <-- (in >> i) & 1;
     }
 }
 
-component main = Num2Bits(5);
-
-//NOTE: For indexing dependent on the loop variable, need to compute pointer
-//	reference outside of the function call. All else can be done inside.
-//Q: What if there are more complex loop conditions? Or multiple iteration variables?
-// 
-//With array storage allocation:
-// template Num2Bits(arena[6]*) {
-// 	   lvars[4];
-// 	   subcmps[0];
-// 	
-//     lvars[1] = 0;
-//     lvars[2] = 1;
-//     for (var i = 0; i < lvars[0]; i++) {  // i == lvars[3]
-//         arena[i] <-- (arena[5] >> i) & 1;
-//         lvars[1] += arena[i] * lvars[2];
-//         lvars[2] = lvars[2] + lvars[2];
-//     }
-// 
-//     lvars[1] === arena[5];
-// }
+component main = Num2Bits(2);
 //
-//With loop body extracted:
-// function loop_body(arena[6]*, lvars[4]*, subcmps[0]*, i, arena_i*) {
-//     arena_i <-- (arena[5] >> i) & 1;
-//     lvars[1] += arena_i * lvars[2];
-//     lvars[2] = lvars[2] + lvars[2];
-// }
-// template Num2Bits(arena[6]*) {
-//     lvars[4];
-//     subcmps[0];
-// 	
-//     lvars[1] = 0;
-//     lvars[2] = 1;
-//     for (var i = 0; i < lvars[0]; i++) {  // i == lvars[3]
-//         loop_body(arena, &lvars, &subcmps, i, %arena[i]);
-//     }
+// %0 (i.e. signal arena) = { out[0], out[1], out[2], out[3], in }
+// %lvars = { n, i, j }
+//
+//unrolled code:
+//	out[0] = in;
+//	out[1] = in;
+//	out[2] = in;
+//	out[3] = in;
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
 // 
-//     lvars[1] === arena[5];
-// }
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
 //
-
-
-
-
-
-
-
-
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6)
+//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, [0 x i256]* %9)
+//CHECK-NEXT:   %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   %13 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %13, align 4
+//CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16)
+//CHECK-NEXT:   %17 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %19 = bitcast i256* %18 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, [0 x i256]* %19)
+//CHECK-NEXT:   %20 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %21 = load i256, i256* %20, align 4
+//CHECK-NEXT:   %call.fr_add15 = call i256 @fr_add(i256 %21, i256 1)
+//CHECK-NEXT:   %22 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add15, i256* %22, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }

From 8a1b98386c98ab206ed70c19864e8fa87cd8038c Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 11 Sep 2023 22:38:34 -0500
Subject: [PATCH 17/42] refactor loop unroll into multiple files

---
 circuit_passes/src/passes/loop_unroll.rs      | 1206 -----------------
 .../src/passes/loop_unroll/body_extractor.rs  |  259 ++++
 .../loop_unroll/extracted_location_updater.rs |  189 +++
 .../passes/loop_unroll/loop_env_recorder.rs   |  216 +++
 circuit_passes/src/passes/loop_unroll/mod.rs  |  587 ++++++++
 5 files changed, 1251 insertions(+), 1206 deletions(-)
 delete mode 100644 circuit_passes/src/passes/loop_unroll.rs
 create mode 100644 circuit_passes/src/passes/loop_unroll/body_extractor.rs
 create mode 100644 circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
 create mode 100644 circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
 create mode 100644 circuit_passes/src/passes/loop_unroll/mod.rs

diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll.rs
deleted file mode 100644
index e28dd50bb..000000000
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ /dev/null
@@ -1,1206 +0,0 @@
-use std::cell::RefCell;
-use std::collections::{BTreeMap, HashMap, HashSet};
-use std::fmt::{Debug, Formatter};
-use std::vec;
-use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
-use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
-use compiler::circuit_design::template::TemplateCode;
-use compiler::compiler_interface::Circuit;
-use compiler::hir::very_concrete_program::Param;
-use compiler::intermediate_representation::{
-    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
-};
-use compiler::intermediate_representation::ir_interface::*;
-use crate::bucket_interpreter::env::Env;
-use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::bucket_interpreter::value::Value;
-use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
-use crate::passes::memory::PassMemory;
-
-const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
-struct VariableValues<'a> {
-    pub env_at_header: Env<'a>,
-    pub loadstore_to_index: HashMap<BucketId, (AddressType, Value)>, // key is load/store bucket ID
-}
-
-impl<'a> VariableValues<'a> {
-    pub fn new(env_at_header: Env<'a>) -> Self {
-        VariableValues { env_at_header, loadstore_to_index: Default::default() }
-    }
-}
-
-impl Debug for VariableValues<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        // write!(
-        //     f,
-        //     "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
-        //     self.env_at_header, self.loadstore_to_index
-        // )
-        write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
-    }
-}
-
-struct EnvRecorder<'a> {
-    mem: &'a PassMemory,
-    // NOTE: RefCell is needed here because the instance of this struct is borrowed by
-    //  the main interpreter while we also need to mutate these internal structures.
-    vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
-    current_iter_num: RefCell<usize>,
-    safe_to_move: RefCell<bool>,
-}
-
-impl Debug for EnvRecorder<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "\n current_iter_num = {}\n safe_to_move = {:?}\n vals_per_iteration = {:?}",
-            self.current_iter_num.borrow(),
-            self.safe_to_move.borrow(),
-            self.vals_per_iteration.borrow(),
-        )
-    }
-}
-
-impl<'a> EnvRecorder<'a> {
-    pub fn new(mem: &'a PassMemory) -> Self {
-        EnvRecorder {
-            mem,
-            vals_per_iteration: Default::default(),
-            current_iter_num: RefCell::new(0),
-            safe_to_move: RefCell::new(true),
-        }
-    }
-
-    pub fn is_safe_to_move(&self) -> bool {
-        *self.safe_to_move.borrow()
-    }
-
-    pub fn increment_iter(&self) {
-        *self.current_iter_num.borrow_mut() += 1;
-    }
-
-    pub fn get_iter(&self) -> usize {
-        *self.current_iter_num.borrow()
-    }
-
-    pub fn record_env_at_header(&self, env: Env<'a>) {
-        let iter = self.get_iter();
-        assert!(!self.vals_per_iteration.borrow().contains_key(&iter));
-        self.vals_per_iteration.borrow_mut().insert(iter, VariableValues::new(env));
-    }
-
-    pub fn get_header_env_clone(&self) -> Env {
-        let iter = self.get_iter();
-        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
-        self.vals_per_iteration.borrow().get(&iter).unwrap().env_at_header.clone()
-    }
-
-    fn record_memloc_at_bucket(&self, bucket_id: &BucketId, addr_ty: AddressType, val: Value) {
-        let iter = self.get_iter();
-        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
-        self.vals_per_iteration
-            .borrow_mut()
-            .get_mut(&iter)
-            .unwrap()
-            .loadstore_to_index
-            .insert(*bucket_id, (addr_ty, val));
-    }
-
-    fn compute_index(&self, loc: &LocationRule, env: &Env) -> Value {
-        match loc {
-            LocationRule::Mapped { .. } => {
-                todo!(); //not sure if/how to handle that
-            }
-            LocationRule::Indexed { location, .. } => {
-                // Evaluate the index using the current environment and using the environment from the
-                //  loop header. If either is Unknown or they do not give the same value, then it is
-                //  not safe to move the loop body to another function because the index computation may
-                //  not give the same result when done at the call site, outside of the new function.
-                let interp = self.mem.build_interpreter(self);
-                let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
-                // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
-                if let Some(idx_loc) = idx_loc {
-                    let (idx_header, _) =
-                        interp.execute_instruction(location, self.get_header_env_clone(), false);
-                    if let Some(idx_header) = idx_header {
-                        if Value::eq(&idx_header, &idx_loc) {
-                            return idx_loc;
-                        }
-                    }
-                }
-                Value::Unknown
-            }
-        }
-    }
-
-    fn check(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
-        let val_result = self.compute_index(loc, env);
-        if val_result == Value::Unknown {
-            self.safe_to_move.replace(false);
-        }
-        //NOTE: must record even when Unknown to ensure that Unknown
-        //  value is not confused with missing values for an iteration
-        //  that can be caused by conditionals within the loop.
-        self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), val_result);
-    }
-}
-
-impl InterpreterObserver for EnvRecorder<'_> {
-    fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
-        if let Some(_) = bucket.bounded_fn {
-            todo!(); //not sure if/how to handle that
-        }
-        self.check(&bucket.id, &bucket.address_type, &bucket.src, env);
-        true
-    }
-
-    fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
-        if let Some(_) = bucket.bounded_fn {
-            todo!(); //not sure if/how to handle that
-        }
-        self.check(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
-        true
-    }
-
-    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_loop_bucket(&self, _bucket: &LoopBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
-        self.is_safe_to_move() //continue observing unless something unsafe has been found
-    }
-
-    fn ignore_function_calls(&self) -> bool {
-        true
-    }
-
-    fn ignore_subcmp_calls(&self) -> bool {
-        true
-    }
-}
-
-pub struct LoopUnrollPass {
-    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: PassMemory,
-    replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
-    new_body_functions: RefCell<Vec<FunctionCode>>,
-}
-
-impl LoopUnrollPass {
-    pub fn new(prime: &String) -> Self {
-        LoopUnrollPass {
-            memory: PassMemory::new(prime, String::from(""), Default::default()),
-            replacements: Default::default(),
-            new_body_functions: Default::default(),
-        }
-    }
-
-    fn check_load_bucket(
-        &self,
-        bucket: &mut LoadBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
-            // Update the destination information to reference the argument
-            //NOTE: This can't use AddressType::Variable or AddressType::Signal
-            //  because ExtractedFunctionLLVMIRProducer references the first two
-            //  parameters with those. So this has to use SubcmpSignal (it should
-            //  work fine because subcomps will also just be additional params).
-            bucket.address_type = AddressType::SubcmpSignal {
-                cmp_address: Self::new_u32_value(bucket, x),
-                uniform_parallel_value: None,
-                is_output: false,
-                input_information: InputInformation::NoInput,
-            };
-            bucket.src = LocationRule::Indexed {
-                location: Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
-                template_header: None,
-            };
-        } else {
-            // If not replacing, check deeper in the AddressType and LocationRule
-            self.check_address_type(&mut bucket.address_type, bucket_arg_order);
-            self.check_location_rule(&mut bucket.src, bucket_arg_order);
-        }
-    }
-
-    fn check_store_bucket(
-        &self,
-        bucket: &mut StoreBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        // Check the source/RHS of the store in either case
-        self.check_instruction(&mut bucket.src, bucket_arg_order);
-        //
-        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
-            // Update the destination information to reference the argument
-            bucket.dest_address_type = AddressType::SubcmpSignal {
-                cmp_address: Self::new_u32_value(bucket, x),
-                uniform_parallel_value: None,
-                is_output: false,
-                input_information: InputInformation::NoInput,
-            };
-            bucket.dest = LocationRule::Indexed {
-                location: Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
-                template_header: None,
-            };
-        } else {
-            // If not replacing, check deeper in the AddressType and LocationRule
-            self.check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
-            self.check_location_rule(&mut bucket.dest, bucket_arg_order);
-        }
-    }
-
-    fn check_location_rule(
-        &self,
-        location_rule: &mut LocationRule,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        match location_rule {
-            LocationRule::Indexed { location, .. } => {
-                self.check_instruction(location, bucket_arg_order);
-            }
-            LocationRule::Mapped { .. } => unreachable!(),
-        }
-    }
-
-    fn check_address_type(
-        &self,
-        addr_type: &mut AddressType,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
-            self.check_instruction(cmp_address, bucket_arg_order);
-        }
-    }
-
-    fn check_compute_bucket(
-        &self,
-        bucket: &mut ComputeBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        for i in &mut bucket.stack {
-            self.check_instruction(i, bucket_arg_order);
-        }
-    }
-
-    fn check_assert_bucket(
-        &self,
-        bucket: &mut AssertBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        self.check_instruction(&mut bucket.evaluate, bucket_arg_order);
-    }
-
-    fn check_loop_bucket(
-        &self,
-        bucket: &mut LoopBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        todo!()
-    }
-
-    fn check_create_cmp_bucket(
-        &self,
-        bucket: &mut CreateCmpBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        todo!()
-    }
-
-    fn check_constraint_bucket(
-        &self,
-        bucket: &mut ConstraintBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        self.check_instruction(
-            match bucket {
-                ConstraintBucket::Substitution(i) => i,
-                ConstraintBucket::Equality(i) => i,
-            },
-            bucket_arg_order,
-        );
-    }
-
-    fn check_block_bucket(
-        &self,
-        bucket: &mut BlockBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        todo!()
-    }
-
-    fn check_call_bucket(
-        &self,
-        bucket: &mut CallBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        todo!()
-    }
-
-    fn check_branch_bucket(
-        &self,
-        bucket: &mut BranchBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        todo!()
-    }
-
-    fn check_return_bucket(
-        &self,
-        bucket: &mut ReturnBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        self.check_instruction(&mut bucket.value, bucket_arg_order);
-    }
-
-    fn check_log_bucket(
-        &self,
-        bucket: &mut LogBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        for arg in &mut bucket.argsprint {
-            if let LogBucketArg::LogExp(i) = arg {
-                self.check_instruction(i, bucket_arg_order);
-            }
-        }
-    }
-
-    //Nothing to do
-    fn check_value_bucket(&self, _: &mut ValueBucket, _: &mut BTreeMap<BucketId, usize>) {}
-    fn check_nop_bucket(&self, _: &mut NopBucket, _: &mut BTreeMap<BucketId, usize>) {}
-
-    fn check_instruction(
-        &self,
-        inst: &mut InstructionPointer,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) {
-        match inst.as_mut() {
-            Instruction::Value(ref mut b) => self.check_value_bucket(b, bucket_arg_order),
-            Instruction::Load(ref mut b) => self.check_load_bucket(b, bucket_arg_order),
-            Instruction::Store(ref mut b) => self.check_store_bucket(b, bucket_arg_order),
-            Instruction::Compute(ref mut b) => self.check_compute_bucket(b, bucket_arg_order),
-            Instruction::Call(ref mut b) => self.check_call_bucket(b, bucket_arg_order),
-            Instruction::Branch(ref mut b) => self.check_branch_bucket(b, bucket_arg_order),
-            Instruction::Return(ref mut b) => self.check_return_bucket(b, bucket_arg_order),
-            Instruction::Assert(ref mut b) => self.check_assert_bucket(b, bucket_arg_order),
-            Instruction::Log(ref mut b) => self.check_log_bucket(b, bucket_arg_order),
-            Instruction::Loop(ref mut b) => self.check_loop_bucket(b, bucket_arg_order),
-            Instruction::CreateCmp(ref mut b) => self.check_create_cmp_bucket(b, bucket_arg_order),
-            Instruction::Constraint(ref mut b) => self.check_constraint_bucket(b, bucket_arg_order),
-            Instruction::Block(ref mut b) => self.check_block_bucket(b, bucket_arg_order),
-            Instruction::Nop(ref mut b) => self.check_nop_bucket(b, bucket_arg_order),
-        }
-    }
-
-    fn extract_body(
-        &self,
-        bucket: &LoopBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
-    ) -> String {
-        // NOTE: must create parameter list before 'bucket_arg_order' is modified
-        let mut params = vec![
-            Param { name: String::from("lvars"), length: vec![0] },
-            Param { name: String::from("signals"), length: vec![0] },
-        ];
-        for i in 0..bucket_arg_order.len() {
-            params.push(Param { name: format!("fixed_{}", i), length: vec![0] });
-        }
-
-        // Copy loop body and add a "return void" at the end
-        let mut new_body = vec![];
-        for s in &bucket.body {
-            let mut copy: InstructionPointer = s.clone();
-            if !bucket_arg_order.is_empty() {
-                //Traverse each cloned statement before calling `update_id()` and replace the
-                //  old location reference with reference to the proper argument. Mappings are
-                //  removed as they are processed so no change is needed once the map is empty.
-                self.check_instruction(&mut copy, bucket_arg_order);
-            }
-            copy.update_id();
-            new_body.push(copy);
-        }
-        assert!(bucket_arg_order.is_empty());
-        new_body.push(
-            ReturnBucket {
-                id: new_id(),
-                source_file_id: bucket.source_file_id,
-                line: bucket.line,
-                message_id: bucket.message_id,
-                with_size: usize::MAX, // size > 1 will produce "return void" LLVM instruction
-                value: NopBucket { id: new_id() }.allocate(),
-            }
-            .allocate(),
-        );
-        // Create new function to hold the copied body
-        // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
-        let func_name = format!("{}{}", LOOP_BODY_FN_PREFIX, new_id());
-        let new_func = Box::new(FunctionCodeInfo {
-            source_file_id: bucket.source_file_id,
-            line: bucket.line,
-            name: func_name.clone(),
-            header: func_name.clone(),
-            body: new_body,
-            params,
-            returns: vec![], // void return type on the function
-            ..FunctionCodeInfo::default()
-        });
-        // Store the function to be transformed and added to circuit later
-        self.new_body_functions.borrow_mut().push(new_func);
-        func_name
-    }
-
-    fn new_u32_value(bucket: &dyn ObtainMeta, val: usize) -> InstructionPointer {
-        ValueBucket {
-            id: new_id(),
-            source_file_id: bucket.get_source_file_id().clone(),
-            line: bucket.get_line(),
-            message_id: bucket.get_message_id(),
-            parse_as: ValueType::U32,
-            op_aux_no: 0,
-            value: val,
-        }
-        .allocate()
-    }
-    fn new_custom_fn_load_bucket(
-        bucket: &dyn ObtainMeta,
-        load_fun: &str,
-        addr_type: AddressType,
-        location: InstructionPointer,
-    ) -> InstructionPointer {
-        LoadBucket {
-            id: new_id(),
-            source_file_id: bucket.get_source_file_id().clone(),
-            line: bucket.get_line(),
-            message_id: bucket.get_message_id(),
-            address_type: addr_type,
-            src: LocationRule::Indexed { location, template_header: None },
-            bounded_fn: Some(String::from(load_fun)),
-        }
-        .allocate()
-    }
-
-    fn new_storage_ptr_ref(bucket: &dyn ObtainMeta, addr_type: AddressType) -> InstructionPointer {
-        Self::new_custom_fn_load_bucket(
-            bucket,
-            FR_IDENTITY_ARR_PTR,
-            addr_type,
-            Self::new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
-        )
-    }
-
-    //NOTE: When the 'bounded_fn' for LoadBucket is Some(_), the index parameter
-    //  is ignored so we must instead use `FR_INDEX_ARR_PTR` to apply the index.
-    //  Uses of that function can be inlined later.
-    // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
-    fn new_indexed_storage_ptr_ref(
-        bucket: &dyn ObtainMeta,
-        addr_type: AddressType,
-        index: usize,
-    ) -> InstructionPointer {
-        CallBucket {
-            id: new_id(),
-            source_file_id: bucket.get_source_file_id().clone(),
-            line: bucket.get_line(),
-            message_id: bucket.get_message_id(),
-            symbol: String::from(FR_INDEX_ARR_PTR),
-            return_info: ReturnType::Intermediate { op_aux_no: 0 },
-            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
-            argument_types: vec![], // LLVM IR generation doesn't use this field
-            arguments: vec![
-                Self::new_storage_ptr_ref(bucket, addr_type),
-                Self::new_u32_value(bucket, index),
-            ],
-        }
-        .allocate()
-    }
-
-    fn is_all_same(data: &[usize]) -> bool {
-        data.iter()
-            .fold((true, None), {
-                |acc, elem| {
-                    if acc.1.is_some() {
-                        (acc.0 && (acc.1.unwrap() == elem), Some(elem))
-                    } else {
-                        (true, Some(elem))
-                    }
-                }
-            })
-            .0
-    }
-
-    //return value key is iteration number
-    fn compute_extra_args(
-        recorder: &EnvRecorder,
-    ) -> (HashMap<usize, Vec<(AddressType, Value)>>, BTreeMap<BucketId, usize>) {
-        let mut iter_to_loc: HashMap<usize, Vec<(AddressType, Value)>> = HashMap::default();
-        let mut bucket_arg_order = BTreeMap::new();
-        let vpi = recorder.vals_per_iteration.borrow();
-        let all_loadstore_bucket_ids: HashSet<&BucketId> =
-            vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
-        // println!("all_loadstore_bucket_ids = {:?}", all_loadstore_bucket_ids);
-        for id in all_loadstore_bucket_ids {
-            // Check if the computed index value is the same across all iterations for this BucketId.
-            //  If it is not the same in all iterations, then it needs to be passed as a separate
-            //  parameter to the new function.
-            // NOTE: Some iterations of the loop may have no mapping for certain BucketIds because
-            //  conditional branches can make certain buckets unused in some iterations. Just ignore
-            //  those cases where there is no value for a certain iteration and check among those
-            //  iterations that have a value. This is the reason it was important to store Unknown
-            //  values in the `loadstore_to_index` index as well, so they are not confused with
-            //  missing values.
-            let mut next_iter_to_store = 0;
-            let mut prev_val = None;
-            for curr_iter in 0..recorder.get_iter() {
-                let curr_val = vpi[&curr_iter].loadstore_to_index.get(id);
-                if curr_val.is_some() {
-                    if prev_val.is_none() {
-                        //initial state
-                        prev_val = curr_val;
-                    } else {
-                        assert!(prev_val.is_some() && curr_val.is_some());
-                        let prev_val_pair = prev_val.unwrap();
-                        let curr_val_pair = curr_val.unwrap();
-                        assert_eq!(prev_val_pair.0, curr_val_pair.0); //AddressType always matches
-                        if !Value::eq(&prev_val_pair.1, &curr_val_pair.1) {
-                            assert!(!prev_val_pair.1.is_unknown() && !curr_val_pair.1.is_unknown());
-                            // Store current Value for current iteration
-                            iter_to_loc.entry(curr_iter).or_default().push(curr_val_pair.clone());
-                            // Store previous Value for all iterations that did have the same
-                            //  value (or None) and have not yet been stored.
-                            for j in next_iter_to_store..curr_iter {
-                                iter_to_loc.entry(j).or_default().push(prev_val_pair.clone());
-                            }
-                            // Update for next iteration
-                            next_iter_to_store = curr_iter + 1;
-                            prev_val = curr_val;
-                        }
-                    }
-                }
-            }
-            //ASSERT: All vectors have the same length at the end of each iteration
-            assert!(Self::is_all_same(&iter_to_loc.values().map(|x| x.len()).collect::<Vec<_>>()));
-            //ASSERT: Value was added for every iteration or for no iterations
-            assert!(next_iter_to_store == 0 || next_iter_to_store == recorder.get_iter());
-            //
-            if next_iter_to_store != 0 {
-                bucket_arg_order.insert(id.clone(), bucket_arg_order.len());
-            }
-        }
-        (iter_to_loc, bucket_arg_order)
-    }
-
-    fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        // {
-        //     println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
-        //     for (i, s) in bucket.body.iter().enumerate() {
-        //         println!("[{}/{}]{}", i + 1, bucket.body.len(), s.to_sexp().to_pretty(100));
-        //     }
-        //     for (i, s) in bucket.body.iter().enumerate() {
-        //         println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
-        //     }
-        //     println!("LOOP ENTRY env {}", env); //TODO: TEMP
-        // }
-        // Compute loop iteration count. If unknown, return immediately.
-        let recorder = EnvRecorder::new(&self.memory);
-        {
-            //TODO: This has the wrong scope if an inner function w/ fixed params will be processed! Need test case for it.
-            //  Can't make it crash. Maybe it's not activating in current setup, it was only when I tried to process the other functions?
-            let interpreter = self.memory.build_interpreter(&recorder);
-            let mut inner_env = env.clone();
-            loop {
-                recorder.record_env_at_header(inner_env.clone());
-                let (_, cond, new_env) =
-                    interpreter.execute_loop_bucket_once(bucket, inner_env, true);
-                match cond {
-                    // If the conditional becomes unknown just give up.
-                    None => return (None, 0),
-                    // When conditional becomes `false`, iteration count is complete.
-                    Some(false) => break,
-                    // Otherwise, continue counting.
-                    Some(true) => recorder.increment_iter(),
-                };
-                inner_env = new_env;
-            }
-        }
-        // println!("recorder = {:?}", recorder); //TODO: TEMP
-
-        let mut block_body = vec![];
-        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
-            // If the loop body contains more than one instruction, extract it into a new
-            // function and generate 'recorder.get_iter()' number of calls to that function.
-            // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.
-            match &bucket.body[..] {
-                [a] => {
-                    for _ in 0..recorder.get_iter() {
-                        let mut copy = a.clone();
-                        copy.update_id();
-                        block_body.push(copy);
-                    }
-                }
-                b => {
-                    assert!(b.len() > 1);
-                    let (iter_to_loc, mut bucket_arg_order) = Self::compute_extra_args(&recorder);
-                    let name = self.extract_body(bucket, &mut bucket_arg_order);
-                    for iter_num in 0..recorder.get_iter() {
-                        // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
-                        //  within the current body. However, it doesn't actually need to generate a load
-                        //  instruction to use these pointers as parameters to the function so we must use the
-                        //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
-                        //  the "loading" (but really it just returns the pointer that was passed in).
-                        let mut args = InstructionList::default();
-                        // Parameter for local vars
-                        args.push(Self::new_storage_ptr_ref(bucket, AddressType::Variable));
-                        // Parameter for signals/arena
-                        args.push(Self::new_storage_ptr_ref(bucket, AddressType::Signal));
-                        // Additional parameters for variant vector/array access within the loop
-                        if !iter_to_loc.is_empty() {
-                            for a in &iter_to_loc[&iter_num] {
-                                args.push(Self::new_indexed_storage_ptr_ref(
-                                    bucket,
-                                    a.0.clone(),
-                                    a.1.get_u32(),
-                                ));
-                            }
-                        }
-                        block_body.push(
-                            CallBucket {
-                                id: new_id(),
-                                source_file_id: bucket.source_file_id,
-                                line: bucket.line,
-                                message_id: bucket.message_id,
-                                symbol: name.clone(),
-                                return_info: ReturnType::Intermediate { op_aux_no: 0 },
-                                arena_size: 0, // size 0 indicates arguments should not be placed into an arena
-                                argument_types: vec![], // LLVM IR generation doesn't use this field
-                                arguments: args,
-                            }
-                            .allocate(),
-                        );
-                    }
-                }
-            }
-        } else {
-            //If the loop body is not safe to move into a new function, just unroll.
-            for _ in 0..recorder.get_iter() {
-                for s in &bucket.body {
-                    let mut copy = s.clone();
-                    copy.update_id();
-                    block_body.push(copy);
-                }
-            }
-        }
-        (Some(block_body), recorder.get_iter())
-    }
-
-    // Will take the unrolled loop and interpretate it
-    // checking if new loop buckets appear
-    fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
-        let interpreter = self.memory.build_interpreter(self);
-        interpreter.execute_block_bucket(bucket, env.clone(), true);
-    }
-}
-
-impl InterpreterObserver for LoopUnrollPass {
-    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_load_bucket(&self, _bucket: &LoadBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_store_bucket(&self, _bucket: &StoreBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_loop_bucket(&self, bucket: &LoopBucket, env: &Env) -> bool {
-        if let (Some(block_body), n_iters) = self.try_unroll_loop(bucket, env) {
-            let block = BlockBucket {
-                id: new_id(),
-                source_file_id: bucket.source_file_id,
-                line: bucket.line,
-                message_id: bucket.message_id,
-                body: block_body,
-                n_iters,
-            };
-            self.continue_inside(&block, env);
-            self.replacements.borrow_mut().insert(bucket.id, block.allocate());
-        }
-        false
-    }
-
-    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
-        true
-    }
-
-    fn ignore_function_calls(&self) -> bool {
-        true
-    }
-
-    fn ignore_subcmp_calls(&self) -> bool {
-        true
-    }
-}
-
-impl CircuitTransformationPass for LoopUnrollPass {
-    fn name(&self) -> &str {
-        "LoopUnrollPass"
-    }
-
-    fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.fill_from_circuit(circuit);
-    }
-
-    fn post_hook_circuit(&self, cir: &mut Circuit) {
-        // Normalize return type on source functions for "WriteLLVMIR for Circuit"
-        //  which treats a 1-D vector of size 1 as a scalar return and an empty
-        //  vector as "void" return type (the initial Circuit builder uses empty
-        //  for scalar returns because it doesn't consider "void" return possible).
-        for f in &mut cir.functions {
-            if f.returns.is_empty() {
-                f.returns = vec![1];
-            }
-        }
-        // Transform and add the new body functions
-        for f in self.new_body_functions.borrow().iter() {
-            cir.functions.push(self.transform_function(&f));
-        }
-    }
-
-    fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.set_scope(template);
-        self.memory.run_template(self, template);
-    }
-
-    fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.get_field_constants_clone()
-    }
-
-    fn transform_loop_bucket(&self, bucket: &LoopBucket) -> InstructionPointer {
-        if let Some(unrolled_loop) = self.replacements.borrow().get(&bucket.id) {
-            return self.transform_instruction(unrolled_loop);
-        }
-        LoopBucket {
-            id: new_id(),
-            source_file_id: bucket.source_file_id,
-            line: bucket.line,
-            message_id: bucket.message_id,
-            continue_condition: self.transform_instruction(&bucket.continue_condition),
-            body: self.transform_instructions(&bucket.body),
-        }
-        .allocate()
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::collections::HashMap;
-    use compiler::circuit_design::template::TemplateCodeInfo;
-    use compiler::compiler_interface::Circuit;
-    use compiler::intermediate_representation::{Instruction, new_id};
-    use compiler::intermediate_representation::ir_interface::{
-        AddressType, Allocate, ComputeBucket, InstrContext, LoadBucket, LocationRule, LoopBucket,
-        OperatorType, StoreBucket, ValueBucket, ValueType,
-    };
-    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
-    use crate::passes::loop_unroll::LoopUnrollPass;
-
-    #[test]
-    fn test_loop_unrolling() {
-        let prime = "goldilocks".to_string();
-        let pass = LoopUnrollPass::new(&prime);
-        let mut circuit = example_program();
-        circuit.llvm_data.variable_index_mapping.insert("test_0".to_string(), HashMap::new());
-        circuit.llvm_data.signal_index_mapping.insert("test_0".to_string(), HashMap::new());
-        circuit.llvm_data.component_index_mapping.insert("test_0".to_string(), HashMap::new());
-        let new_circuit = pass.transform_circuit(&circuit);
-        if cfg!(debug_assertions) {
-            println!("{}", new_circuit.templates[0].body.last().unwrap().to_string());
-        }
-        assert_ne!(circuit, new_circuit);
-        match new_circuit.templates[0].body.last().unwrap().as_ref() {
-            Instruction::Block(b) => {
-                // 5 iterations unrolled into 5 call statements targeting extracted loop body functions
-                assert_eq!(b.body.len(), 5);
-                assert!(b.body.iter().all(|s| if let Instruction::Call(c) = s.as_ref() {
-                    c.symbol.starts_with(LOOP_BODY_FN_PREFIX)
-                } else {
-                    false
-                }));
-            }
-            _ => assert!(false),
-        }
-    }
-
-    fn example_program() -> Circuit {
-        Circuit {
-            wasm_producer: Default::default(),
-            c_producer: Default::default(),
-            llvm_data: Default::default(),
-            templates: vec![Box::new(TemplateCodeInfo {
-                id: 0,
-                source_file_id: None,
-                line: 0,
-                header: "test_0".to_string(),
-                name: "test".to_string(),
-                is_parallel: false,
-                is_parallel_component: false,
-                is_not_parallel_component: false,
-                has_parallel_sub_cmp: false,
-                number_of_inputs: 0,
-                number_of_outputs: 0,
-                number_of_intermediates: 0,
-                body: vec![
-                    // (store 0 0)
-                    StoreBucket {
-                        id: new_id(),
-                        source_file_id: None,
-                        line: 0,
-                        message_id: 0,
-                        context: InstrContext { size: 0 },
-                        dest_is_output: false,
-                        dest_address_type: AddressType::Variable,
-                        dest: LocationRule::Indexed {
-                            location: ValueBucket {
-                                id: new_id(),
-                                source_file_id: None,
-                                line: 0,
-                                message_id: 0,
-                                parse_as: ValueType::U32,
-                                op_aux_no: 0,
-                                value: 0,
-                            }
-                            .allocate(),
-                            template_header: Some("test_0".to_string()),
-                        },
-                        src: ValueBucket {
-                            id: new_id(),
-                            source_file_id: None,
-                            line: 0,
-                            message_id: 0,
-                            parse_as: ValueType::U32,
-                            op_aux_no: 0,
-                            value: 0,
-                        }
-                        .allocate(),
-                        bounded_fn: None,
-                    }
-                    .allocate(),
-                    // (store 1 0)
-                    StoreBucket {
-                        id: new_id(),
-                        source_file_id: None,
-                        line: 0,
-                        message_id: 0,
-                        context: InstrContext { size: 0 },
-                        dest_is_output: false,
-                        dest_address_type: AddressType::Variable,
-                        dest: LocationRule::Indexed {
-                            location: ValueBucket {
-                                id: new_id(),
-                                source_file_id: None,
-                                line: 0,
-                                message_id: 0,
-                                parse_as: ValueType::U32,
-                                op_aux_no: 0,
-                                value: 1,
-                            }
-                            .allocate(),
-                            template_header: Some("test_0".to_string()),
-                        },
-                        src: ValueBucket {
-                            id: new_id(),
-                            source_file_id: None,
-                            line: 0,
-                            message_id: 0,
-                            parse_as: ValueType::U32,
-                            op_aux_no: 0,
-                            value: 0,
-                        }
-                        .allocate(),
-                        bounded_fn: None,
-                    }
-                    .allocate(),
-                    // (loop (compute le (load 1) 5) (
-                    LoopBucket {
-                        id: new_id(),
-                        source_file_id: None,
-                        line: 0,
-                        message_id: 0,
-                        continue_condition: ComputeBucket {
-                            id: new_id(),
-                            source_file_id: None,
-                            line: 0,
-                            message_id: 0,
-                            op: OperatorType::Lesser,
-                            op_aux_no: 0,
-                            stack: vec![
-                                LoadBucket {
-                                    id: new_id(),
-                                    source_file_id: None,
-                                    line: 0,
-                                    message_id: 0,
-                                    address_type: AddressType::Variable,
-                                    src: LocationRule::Indexed {
-                                        location: ValueBucket {
-                                            id: new_id(),
-                                            source_file_id: None,
-                                            line: 0,
-                                            message_id: 0,
-                                            parse_as: ValueType::U32,
-                                            op_aux_no: 0,
-                                            value: 1,
-                                        }
-                                        .allocate(),
-                                        template_header: Some("test_0".to_string()),
-                                    },
-                                    bounded_fn: None,
-                                }
-                                .allocate(),
-                                ValueBucket {
-                                    id: new_id(),
-                                    source_file_id: None,
-                                    line: 0,
-                                    message_id: 0,
-                                    parse_as: ValueType::U32,
-                                    op_aux_no: 0,
-                                    value: 5,
-                                }
-                                .allocate(),
-                            ],
-                        }
-                        .allocate(),
-                        body: vec![
-                            //   (store 0 (compute add (load 0) 2))
-                            StoreBucket {
-                                id: new_id(),
-                                source_file_id: None,
-                                line: 0,
-                                message_id: 0,
-                                context: InstrContext { size: 0 },
-                                dest_is_output: false,
-                                dest_address_type: AddressType::Variable,
-                                dest: LocationRule::Indexed {
-                                    location: ValueBucket {
-                                        id: new_id(),
-                                        source_file_id: None,
-                                        line: 0,
-                                        message_id: 0,
-                                        parse_as: ValueType::U32,
-                                        op_aux_no: 0,
-                                        value: 0,
-                                    }
-                                    .allocate(),
-                                    template_header: None,
-                                },
-                                src: ComputeBucket {
-                                    id: new_id(),
-                                    source_file_id: None,
-                                    line: 0,
-                                    message_id: 0,
-                                    op: OperatorType::Add,
-                                    op_aux_no: 0,
-                                    stack: vec![
-                                        LoadBucket {
-                                            id: new_id(),
-                                            source_file_id: None,
-                                            line: 0,
-                                            message_id: 0,
-                                            address_type: AddressType::Variable,
-                                            src: LocationRule::Indexed {
-                                                location: ValueBucket {
-                                                    id: new_id(),
-                                                    source_file_id: None,
-                                                    line: 0,
-                                                    message_id: 0,
-                                                    parse_as: ValueType::U32,
-                                                    op_aux_no: 0,
-                                                    value: 0,
-                                                }
-                                                .allocate(),
-                                                template_header: Some("test_0".to_string()),
-                                            },
-                                            bounded_fn: None,
-                                        }
-                                        .allocate(),
-                                        ValueBucket {
-                                            id: new_id(),
-                                            source_file_id: None,
-                                            line: 0,
-                                            message_id: 0,
-                                            parse_as: ValueType::U32,
-                                            op_aux_no: 0,
-                                            value: 2,
-                                        }
-                                        .allocate(),
-                                    ],
-                                }
-                                .allocate(),
-                                bounded_fn: None,
-                            }
-                            .allocate(),
-                            //   (store 1 (compute add (load 1) 1))
-                            StoreBucket {
-                                id: new_id(),
-                                source_file_id: None,
-                                line: 0,
-                                message_id: 0,
-                                context: InstrContext { size: 0 },
-                                dest_is_output: false,
-                                dest_address_type: AddressType::Variable,
-                                dest: LocationRule::Indexed {
-                                    location: ValueBucket {
-                                        id: new_id(),
-                                        source_file_id: None,
-                                        line: 0,
-                                        message_id: 0,
-                                        parse_as: ValueType::U32,
-                                        op_aux_no: 0,
-                                        value: 1,
-                                    }
-                                    .allocate(),
-                                    template_header: None,
-                                },
-                                src: ComputeBucket {
-                                    id: new_id(),
-                                    source_file_id: None,
-                                    line: 0,
-                                    message_id: 0,
-                                    op: OperatorType::Add,
-                                    op_aux_no: 0,
-                                    stack: vec![
-                                        LoadBucket {
-                                            id: new_id(),
-                                            source_file_id: None,
-                                            line: 0,
-                                            message_id: 0,
-                                            address_type: AddressType::Variable,
-                                            src: LocationRule::Indexed {
-                                                location: ValueBucket {
-                                                    id: new_id(),
-                                                    source_file_id: None,
-                                                    line: 0,
-                                                    message_id: 0,
-                                                    parse_as: ValueType::U32,
-                                                    op_aux_no: 0,
-                                                    value: 1,
-                                                }
-                                                .allocate(),
-                                                template_header: Some("test_0".to_string()),
-                                            },
-                                            bounded_fn: None,
-                                        }
-                                        .allocate(),
-                                        ValueBucket {
-                                            id: new_id(),
-                                            source_file_id: None,
-                                            line: 0,
-                                            message_id: 0,
-                                            parse_as: ValueType::U32,
-                                            op_aux_no: 0,
-                                            value: 1,
-                                        }
-                                        .allocate(),
-                                    ],
-                                }
-                                .allocate(),
-                                bounded_fn: None,
-                            }
-                            .allocate(),
-                        ],
-                    }
-                    .allocate(), // ))
-                ],
-                var_stack_depth: 0,
-                expression_stack_depth: 0,
-                signal_stack_depth: 0,
-                number_of_components: 0,
-            })],
-            functions: vec![],
-        }
-    }
-}
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
new file mode 100644
index 000000000..a093cd7af
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -0,0 +1,259 @@
+use std::cell::{RefCell, Ref};
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::vec;
+use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
+use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
+use compiler::hir::very_concrete_program::Param;
+use compiler::intermediate_representation::{
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
+};
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
+use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
+
+use super::new_u32_value;
+
+#[derive(Clone, Debug, Eq, PartialEq, Default)]
+pub struct LoopBodyExtractor {
+    new_body_functions: RefCell<Vec<FunctionCode>>,
+}
+
+impl LoopBodyExtractor {
+    pub fn get_new_functions(&self) -> Ref<Vec<FunctionCode>> {
+        self.new_body_functions.borrow()
+    }
+
+    pub fn extract(
+        &self,
+        bucket: &LoopBucket,
+        recorder: &EnvRecorder,
+        unrolled: &mut InstructionList,
+    ) {
+        assert!(bucket.body.len() > 1);
+        let (iter_to_loc, mut bucket_arg_order) = Self::compute_extra_args(&recorder);
+        let name = self.build_new_body(bucket, &mut bucket_arg_order);
+        for iter_num in 0..recorder.get_iter() {
+            // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
+            //  within the current body. However, it doesn't actually need to generate a load
+            //  instruction to use these pointers as parameters to the function so we must use the
+            //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
+            //  the "loading" (but really it just returns the pointer that was passed in).
+            let mut args = InstructionList::default();
+            // Parameter for local vars
+            args.push(Self::new_storage_ptr_ref(bucket, AddressType::Variable));
+            // Parameter for signals/arena
+            args.push(Self::new_storage_ptr_ref(bucket, AddressType::Signal));
+            // Additional parameters for variant vector/array access within the loop
+            if !iter_to_loc.is_empty() {
+                for a in &iter_to_loc[&iter_num] {
+                    args.push(Self::new_indexed_storage_ptr_ref(
+                        bucket,
+                        a.0.clone(),
+                        a.1.get_u32(),
+                    ));
+                }
+            }
+            unrolled.push(
+                CallBucket {
+                    id: new_id(),
+                    source_file_id: bucket.source_file_id,
+                    line: bucket.line,
+                    message_id: bucket.message_id,
+                    symbol: name.clone(),
+                    return_info: ReturnType::Intermediate { op_aux_no: 0 },
+                    arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+                    argument_types: vec![], // LLVM IR generation doesn't use this field
+                    arguments: args,
+                }
+                .allocate(),
+            );
+        }
+    }
+
+    fn build_new_body(
+        &self,
+        bucket: &LoopBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) -> String {
+        // NOTE: must create parameter list before 'bucket_arg_order' is modified
+        let mut params = vec![
+            Param { name: String::from("lvars"), length: vec![0] },
+            Param { name: String::from("signals"), length: vec![0] },
+        ];
+        for i in 0..bucket_arg_order.len() {
+            params.push(Param { name: format!("fixed_{}", i), length: vec![0] });
+        }
+
+        // Copy loop body and add a "return void" at the end
+        let mut new_body = vec![];
+        for s in &bucket.body {
+            let mut copy: InstructionPointer = s.clone();
+            if !bucket_arg_order.is_empty() {
+                //Traverse each cloned statement before calling `update_id()` and replace the
+                //  old location reference with reference to the proper argument. Mappings are
+                //  removed as they are processed so no change is needed once the map is empty.
+                ExtractedFunctionLocationUpdater::check_instruction(&mut copy, bucket_arg_order);
+            }
+            copy.update_id();
+            new_body.push(copy);
+        }
+        assert!(bucket_arg_order.is_empty());
+        new_body.push(
+            ReturnBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                with_size: usize::MAX, // size > 1 will produce "return void" LLVM instruction
+                value: NopBucket { id: new_id() }.allocate(),
+            }
+            .allocate(),
+        );
+        // Create new function to hold the copied body
+        // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+        let func_name = format!("{}{}", LOOP_BODY_FN_PREFIX, new_id());
+        let new_func = Box::new(FunctionCodeInfo {
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            name: func_name.clone(),
+            header: func_name.clone(),
+            body: new_body,
+            params,
+            returns: vec![], // void return type on the function
+            ..FunctionCodeInfo::default()
+        });
+        // Store the function to be transformed and added to circuit later
+        self.new_body_functions.borrow_mut().push(new_func);
+        func_name
+    }
+
+    fn new_custom_fn_load_bucket(
+        bucket: &dyn ObtainMeta,
+        load_fun: &str,
+        addr_type: AddressType,
+        location: InstructionPointer,
+    ) -> InstructionPointer {
+        LoadBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            address_type: addr_type,
+            src: LocationRule::Indexed { location, template_header: None },
+            bounded_fn: Some(String::from(load_fun)),
+        }
+        .allocate()
+    }
+
+    fn new_storage_ptr_ref(bucket: &dyn ObtainMeta, addr_type: AddressType) -> InstructionPointer {
+        Self::new_custom_fn_load_bucket(
+            bucket,
+            FR_IDENTITY_ARR_PTR,
+            addr_type,
+            new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+        )
+    }
+
+    //NOTE: When the 'bounded_fn' for LoadBucket is Some(_), the index parameter
+    //  is ignored so we must instead use `FR_INDEX_ARR_PTR` to apply the index.
+    //  Uses of that function can be inlined later.
+    // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+    fn new_indexed_storage_ptr_ref(
+        bucket: &dyn ObtainMeta,
+        addr_type: AddressType,
+        index: usize,
+    ) -> InstructionPointer {
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            symbol: String::from(FR_INDEX_ARR_PTR),
+            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+            argument_types: vec![], // LLVM IR generation doesn't use this field
+            arguments: vec![
+                Self::new_storage_ptr_ref(bucket, addr_type),
+                new_u32_value(bucket, index),
+            ],
+        }
+        .allocate()
+    }
+
+    fn is_all_same(data: &[usize]) -> bool {
+        data.iter()
+            .fold((true, None), {
+                |acc, elem| {
+                    if acc.1.is_some() {
+                        (acc.0 && (acc.1.unwrap() == elem), Some(elem))
+                    } else {
+                        (true, Some(elem))
+                    }
+                }
+            })
+            .0
+    }
+
+    //return value key is iteration number
+    fn compute_extra_args(
+        recorder: &EnvRecorder,
+    ) -> (HashMap<usize, Vec<(AddressType, Value)>>, BTreeMap<BucketId, usize>) {
+        let mut iter_to_loc: HashMap<usize, Vec<(AddressType, Value)>> = HashMap::default();
+        let mut bucket_arg_order = BTreeMap::new();
+        let vpi = recorder.vals_per_iteration.borrow();
+        let all_loadstore_bucket_ids: HashSet<&BucketId> =
+            vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
+        // println!("all_loadstore_bucket_ids = {:?}", all_loadstore_bucket_ids);
+        for id in all_loadstore_bucket_ids {
+            // Check if the computed index value is the same across all iterations for this BucketId.
+            //  If it is not the same in all iterations, then it needs to be passed as a separate
+            //  parameter to the new function.
+            // NOTE: Some iterations of the loop may have no mapping for certain BucketIds because
+            //  conditional branches can make certain buckets unused in some iterations. Just ignore
+            //  those cases where there is no value for a certain iteration and check among those
+            //  iterations that have a value. This is the reason it was important to store Unknown
+            //  values in the `loadstore_to_index` index as well, so they are not confused with
+            //  missing values.
+            let mut next_iter_to_store = 0;
+            let mut prev_val = None;
+            for curr_iter in 0..recorder.get_iter() {
+                let curr_val = vpi[&curr_iter].loadstore_to_index.get(id);
+                if curr_val.is_some() {
+                    if prev_val.is_none() {
+                        //initial state
+                        prev_val = curr_val;
+                    } else {
+                        assert!(prev_val.is_some() && curr_val.is_some());
+                        let prev_val_pair = prev_val.unwrap();
+                        let curr_val_pair = curr_val.unwrap();
+                        assert_eq!(prev_val_pair.0, curr_val_pair.0); //AddressType always matches
+                        if !Value::eq(&prev_val_pair.1, &curr_val_pair.1) {
+                            assert!(!prev_val_pair.1.is_unknown() && !curr_val_pair.1.is_unknown());
+                            // Store current Value for current iteration
+                            iter_to_loc.entry(curr_iter).or_default().push(curr_val_pair.clone());
+                            // Store previous Value for all iterations that did have the same
+                            //  value (or None) and have not yet been stored.
+                            for j in next_iter_to_store..curr_iter {
+                                iter_to_loc.entry(j).or_default().push(prev_val_pair.clone());
+                            }
+                            // Update for next iteration
+                            next_iter_to_store = curr_iter + 1;
+                            prev_val = curr_val;
+                        }
+                    }
+                }
+            }
+            //ASSERT: All vectors have the same length at the end of each iteration
+            assert!(Self::is_all_same(&iter_to_loc.values().map(|x| x.len()).collect::<Vec<_>>()));
+            //ASSERT: Value was added for every iteration or for no iterations
+            assert!(next_iter_to_store == 0 || next_iter_to_store == recorder.get_iter());
+            //
+            if next_iter_to_store != 0 {
+                bucket_arg_order.insert(id.clone(), bucket_arg_order.len());
+            }
+        }
+        (iter_to_loc, bucket_arg_order)
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
new file mode 100644
index 000000000..c21c4d36c
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -0,0 +1,189 @@
+use std::collections::BTreeMap;
+use compiler::intermediate_representation::{BucketId, InstructionPointer};
+use compiler::intermediate_representation::ir_interface::*;
+use super::new_u32_value;
+
+pub struct ExtractedFunctionLocationUpdater {}
+
+impl ExtractedFunctionLocationUpdater {
+    fn check_load_bucket(
+        bucket: &mut LoadBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
+            // Update the destination information to reference the argument
+            //NOTE: This can't use AddressType::Variable or AddressType::Signal
+            //  because ExtractedFunctionLLVMIRProducer references the first two
+            //  parameters with those. So this has to use SubcmpSignal (it should
+            //  work fine because subcomps will also just be additional params).
+            bucket.address_type = AddressType::SubcmpSignal {
+                cmp_address: new_u32_value(bucket, x),
+                uniform_parallel_value: None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.src = LocationRule::Indexed {
+                location: new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            Self::check_address_type(&mut bucket.address_type, bucket_arg_order);
+            Self::check_location_rule(&mut bucket.src, bucket_arg_order);
+        }
+    }
+
+    fn check_store_bucket(
+        bucket: &mut StoreBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        // Check the source/RHS of the store in either case
+        Self::check_instruction(&mut bucket.src, bucket_arg_order);
+        //
+        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
+            // Update the destination information to reference the argument
+            bucket.dest_address_type = AddressType::SubcmpSignal {
+                cmp_address: new_u32_value(bucket, x),
+                uniform_parallel_value: None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.dest = LocationRule::Indexed {
+                location: new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            Self::check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
+            Self::check_location_rule(&mut bucket.dest, bucket_arg_order);
+        }
+    }
+
+    fn check_location_rule(
+        location_rule: &mut LocationRule,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        match location_rule {
+            LocationRule::Indexed { location, .. } => {
+                Self::check_instruction(location, bucket_arg_order);
+            }
+            LocationRule::Mapped { .. } => unreachable!(),
+        }
+    }
+
+    fn check_address_type(
+        addr_type: &mut AddressType,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
+            Self::check_instruction(cmp_address, bucket_arg_order);
+        }
+    }
+
+    fn check_compute_bucket(
+        bucket: &mut ComputeBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        for i in &mut bucket.stack {
+            Self::check_instruction(i, bucket_arg_order);
+        }
+    }
+
+    fn check_assert_bucket(
+        bucket: &mut AssertBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        Self::check_instruction(&mut bucket.evaluate, bucket_arg_order);
+    }
+
+    fn check_loop_bucket(
+        bucket: &mut LoopBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_create_cmp_bucket(
+        bucket: &mut CreateCmpBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_constraint_bucket(
+        bucket: &mut ConstraintBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        Self::check_instruction(
+            match bucket {
+                ConstraintBucket::Substitution(i) => i,
+                ConstraintBucket::Equality(i) => i,
+            },
+            bucket_arg_order,
+        );
+    }
+
+    fn check_block_bucket(
+        bucket: &mut BlockBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_call_bucket(
+        bucket: &mut CallBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_branch_bucket(
+        bucket: &mut BranchBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        todo!()
+    }
+
+    fn check_return_bucket(
+        bucket: &mut ReturnBucket,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        Self::check_instruction(&mut bucket.value, bucket_arg_order);
+    }
+
+    fn check_log_bucket(bucket: &mut LogBucket, bucket_arg_order: &mut BTreeMap<BucketId, usize>) {
+        for arg in &mut bucket.argsprint {
+            if let LogBucketArg::LogExp(i) = arg {
+                Self::check_instruction(i, bucket_arg_order);
+            }
+        }
+    }
+
+    //Nothing to do
+    fn check_value_bucket(_: &mut ValueBucket, _: &mut BTreeMap<BucketId, usize>) {}
+    fn check_nop_bucket(_: &mut NopBucket, _: &mut BTreeMap<BucketId, usize>) {}
+
+    pub fn check_instruction(
+        inst: &mut InstructionPointer,
+        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+    ) {
+        match inst.as_mut() {
+            Instruction::Value(ref mut b) => Self::check_value_bucket(b, bucket_arg_order),
+            Instruction::Load(ref mut b) => Self::check_load_bucket(b, bucket_arg_order),
+            Instruction::Store(ref mut b) => Self::check_store_bucket(b, bucket_arg_order),
+            Instruction::Compute(ref mut b) => Self::check_compute_bucket(b, bucket_arg_order),
+            Instruction::Call(ref mut b) => Self::check_call_bucket(b, bucket_arg_order),
+            Instruction::Branch(ref mut b) => Self::check_branch_bucket(b, bucket_arg_order),
+            Instruction::Return(ref mut b) => Self::check_return_bucket(b, bucket_arg_order),
+            Instruction::Assert(ref mut b) => Self::check_assert_bucket(b, bucket_arg_order),
+            Instruction::Log(ref mut b) => Self::check_log_bucket(b, bucket_arg_order),
+            Instruction::Loop(ref mut b) => Self::check_loop_bucket(b, bucket_arg_order),
+            Instruction::CreateCmp(ref mut b) => Self::check_create_cmp_bucket(b, bucket_arg_order),
+            Instruction::Constraint(ref mut b) => {
+                Self::check_constraint_bucket(b, bucket_arg_order)
+            }
+            Instruction::Block(ref mut b) => Self::check_block_bucket(b, bucket_arg_order),
+            Instruction::Nop(ref mut b) => Self::check_nop_bucket(b, bucket_arg_order),
+        }
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
new file mode 100644
index 000000000..0f52655ef
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -0,0 +1,216 @@
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::fmt::{Debug, Formatter};
+use compiler::intermediate_representation::BucketId;
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::memory::PassMemory;
+
+/// Holds values of index variables at array loads/stores within a loop
+pub struct VariableValues<'a> {
+    pub env_at_header: Env<'a>,
+    pub loadstore_to_index: HashMap<BucketId, (AddressType, Value)>, // key is load/store bucket ID
+}
+
+impl<'a> VariableValues<'a> {
+    pub fn new(env_at_header: Env<'a>) -> Self {
+        VariableValues { env_at_header, loadstore_to_index: Default::default() }
+    }
+}
+
+impl Debug for VariableValues<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        // write!(
+        //     f,
+        //     "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
+        //     self.env_at_header, self.loadstore_to_index
+        // )
+        write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
+    }
+}
+
+pub struct EnvRecorder<'a> {
+    mem: &'a PassMemory,
+    // NOTE: RefCell is needed here because the instance of this struct is borrowed by
+    //  the main interpreter while we also need to mutate these internal structures.
+    current_iter_num: RefCell<usize>,
+    safe_to_move: RefCell<bool>,
+    pub vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
+}
+
+impl Debug for EnvRecorder<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "\n current_iter_num = {}\n safe_to_move = {:?}\n vals_per_iteration = {:?}",
+            self.current_iter_num.borrow(),
+            self.safe_to_move.borrow(),
+            self.vals_per_iteration.borrow(),
+        )
+    }
+}
+
+impl<'a> EnvRecorder<'a> {
+    pub fn new(mem: &'a PassMemory) -> Self {
+        EnvRecorder {
+            mem,
+            vals_per_iteration: Default::default(),
+            current_iter_num: RefCell::new(0),
+            safe_to_move: RefCell::new(true),
+        }
+    }
+
+    pub fn is_safe_to_move(&self) -> bool {
+        *self.safe_to_move.borrow()
+    }
+
+    pub fn increment_iter(&self) {
+        *self.current_iter_num.borrow_mut() += 1;
+    }
+
+    pub fn get_iter(&self) -> usize {
+        *self.current_iter_num.borrow()
+    }
+
+    pub fn record_env_at_header(&self, env: Env<'a>) {
+        let iter = self.get_iter();
+        assert!(!self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow_mut().insert(iter, VariableValues::new(env));
+    }
+
+    pub fn get_header_env_clone(&self) -> Env {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow().get(&iter).unwrap().env_at_header.clone()
+    }
+
+    fn record_memloc_at_bucket(&self, bucket_id: &BucketId, addr_ty: AddressType, val: Value) {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration
+            .borrow_mut()
+            .get_mut(&iter)
+            .unwrap()
+            .loadstore_to_index
+            .insert(*bucket_id, (addr_ty, val));
+    }
+
+    fn compute_index(&self, loc: &LocationRule, env: &Env) -> Value {
+        match loc {
+            LocationRule::Mapped { .. } => {
+                todo!(); //not sure if/how to handle that
+            }
+            LocationRule::Indexed { location, .. } => {
+                // Evaluate the index using the current environment and using the environment from the
+                //  loop header. If either is Unknown or they do not give the same value, then it is
+                //  not safe to move the loop body to another function because the index computation may
+                //  not give the same result when done at the call site, outside of the new function.
+                let interp = self.mem.build_interpreter(self);
+                let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
+                // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
+                if let Some(idx_loc) = idx_loc {
+                    let (idx_header, _) =
+                        interp.execute_instruction(location, self.get_header_env_clone(), false);
+                    if let Some(idx_header) = idx_header {
+                        if Value::eq(&idx_header, &idx_loc) {
+                            return idx_loc;
+                        }
+                    }
+                }
+                Value::Unknown
+            }
+        }
+    }
+
+    fn check(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
+        let val_result = self.compute_index(loc, env);
+        if val_result == Value::Unknown {
+            println!("NOT safe to move {}: {:?}[{:?}]", bucket_id, addr_ty, loc); //TODO: TEMP
+            self.safe_to_move.replace(false);
+        }
+        //NOTE: must record even when Unknown to ensure that Unknown
+        //  value is not confused with missing values for an iteration
+        //  that can be caused by conditionals within the loop.
+        self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), val_result);
+    }
+}
+
+impl InterpreterObserver for EnvRecorder<'_> {
+    fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.check(&bucket.id, &bucket.address_type, &bucket.src, env);
+        true
+    }
+
+    fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.check(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
+        true
+    }
+
+    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_loop_bucket(&self, _bucket: &LoopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn ignore_function_calls(&self) -> bool {
+        true
+    }
+
+    fn ignore_subcmp_calls(&self) -> bool {
+        true
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
new file mode 100644
index 000000000..be6b734c8
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -0,0 +1,587 @@
+mod loop_env_recorder;
+mod extracted_location_updater;
+mod body_extractor;
+
+use std::cell::RefCell;
+use std::collections::BTreeMap;
+use std::vec;
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use compiler::compiler_interface::Circuit;
+use compiler::intermediate_representation::{
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId, ToSExp,
+};
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::passes::CircuitTransformationPass;
+use crate::passes::memory::PassMemory;
+use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
+
+use self::body_extractor::LoopBodyExtractor;
+
+const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
+
+pub fn new_u32_value(bucket: &dyn ObtainMeta, val: usize) -> InstructionPointer {
+    ValueBucket {
+        id: new_id(),
+        source_file_id: bucket.get_source_file_id().clone(),
+        line: bucket.get_line(),
+        message_id: bucket.get_message_id(),
+        parse_as: ValueType::U32,
+        op_aux_no: 0,
+        value: val,
+    }
+    .allocate()
+}
+
+pub struct LoopUnrollPass {
+    memory: PassMemory,
+    extractor: LoopBodyExtractor,
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
+    replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
+}
+
+impl LoopUnrollPass {
+    pub fn new(prime: &String) -> Self {
+        LoopUnrollPass {
+            memory: PassMemory::new(prime, String::from(""), Default::default()),
+            replacements: Default::default(),
+            extractor: Default::default(),
+        }
+    }
+
+    fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
+        {
+            println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
+            for (i, s) in bucket.body.iter().enumerate() {
+                println!("[{}/{}]{}", i + 1, bucket.body.len(), s.to_sexp().to_pretty(100));
+            }
+            for (i, s) in bucket.body.iter().enumerate() {
+                println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
+            }
+            println!("LOOP ENTRY env {}", env); //TODO: TEMP
+        }
+        // Compute loop iteration count. If unknown, return immediately.
+        let recorder = EnvRecorder::new(&self.memory);
+        {
+            //TODO: This has the wrong scope if an inner function w/ fixed params will be processed! Need test case for it.
+            //  Can't make it crash. Maybe it's not activating in current setup, it was only when I tried to process the other functions?
+            let interpreter = self.memory.build_interpreter(&recorder);
+            let mut inner_env = env.clone();
+            loop {
+                recorder.record_env_at_header(inner_env.clone());
+                let (_, cond, new_env) =
+                    interpreter.execute_loop_bucket_once(bucket, inner_env, true);
+                match cond {
+                    // If the conditional becomes unknown just give up.
+                    None => return (None, 0),
+                    // When conditional becomes `false`, iteration count is complete.
+                    Some(false) => break,
+                    // Otherwise, continue counting.
+                    Some(true) => recorder.increment_iter(),
+                };
+                inner_env = new_env;
+            }
+        }
+        println!("recorder = {:?}", recorder); //TODO: TEMP
+
+        let mut block_body = vec![];
+        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
+            // If the loop body contains more than one instruction, extract it into a new
+            // function and generate 'recorder.get_iter()' number of calls to that function.
+            // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.
+            match &bucket.body[..] {
+                [a] => {
+                    for _ in 0..recorder.get_iter() {
+                        let mut copy = a.clone();
+                        copy.update_id();
+                        block_body.push(copy);
+                    }
+                }
+                _ => {
+                    self.extractor.extract(bucket, &recorder, &mut block_body);
+                }
+            }
+        } else {
+            //If the loop body is not safe to move into a new function, just unroll.
+            for _ in 0..recorder.get_iter() {
+                for s in &bucket.body {
+                    let mut copy = s.clone();
+                    copy.update_id();
+                    block_body.push(copy);
+                }
+            }
+        }
+        (Some(block_body), recorder.get_iter())
+    }
+
+    // Will take the unrolled loop and interpretate it
+    // checking if new loop buckets appear
+    fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
+        println!("\ncontinue_inside {:?} with {} ", bucket, env);
+        let interpreter = self.memory.build_interpreter(self);
+        interpreter.execute_block_bucket(bucket, env.clone(), true);
+    }
+}
+
+impl InterpreterObserver for LoopUnrollPass {
+    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_load_bucket(&self, _bucket: &LoadBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_store_bucket(&self, _bucket: &StoreBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_loop_bucket(&self, bucket: &LoopBucket, env: &Env) -> bool {
+        if let (Some(block_body), n_iters) = self.try_unroll_loop(bucket, env) {
+            let block = BlockBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                body: block_body,
+                n_iters,
+            };
+            self.continue_inside(&block, env);
+            self.replacements.borrow_mut().insert(bucket.id, block.allocate());
+        }
+        false
+    }
+
+    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
+        true
+    }
+
+    fn ignore_function_calls(&self) -> bool {
+        true
+    }
+
+    fn ignore_subcmp_calls(&self) -> bool {
+        true
+    }
+}
+
+impl CircuitTransformationPass for LoopUnrollPass {
+    fn name(&self) -> &str {
+        "LoopUnrollPass"
+    }
+
+    fn pre_hook_circuit(&self, circuit: &Circuit) {
+        self.memory.fill_from_circuit(circuit);
+    }
+
+    fn post_hook_circuit(&self, cir: &mut Circuit) {
+        // Normalize return type on source functions for "WriteLLVMIR for Circuit"
+        //  which treats a 1-D vector of size 1 as a scalar return and an empty
+        //  vector as "void" return type (the initial Circuit builder uses empty
+        //  for scalar returns because it doesn't consider "void" return possible).
+        for f in &mut cir.functions {
+            if f.returns.is_empty() {
+                f.returns = vec![1];
+            }
+        }
+        // Transform and add the new body functions
+        for f in self.extractor.get_new_functions().iter() {
+            cir.functions.push(self.transform_function(&f));
+        }
+    }
+
+    fn pre_hook_template(&self, template: &TemplateCode) {
+        self.memory.set_scope(template);
+        self.memory.run_template(self, template);
+    }
+
+    fn get_updated_field_constants(&self) -> Vec<String> {
+        self.memory.get_field_constants_clone()
+    }
+
+    fn transform_loop_bucket(&self, bucket: &LoopBucket) -> InstructionPointer {
+        if let Some(unrolled_loop) = self.replacements.borrow().get(&bucket.id) {
+            return self.transform_instruction(unrolled_loop);
+        }
+        LoopBucket {
+            id: new_id(),
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            message_id: bucket.message_id,
+            continue_condition: self.transform_instruction(&bucket.continue_condition),
+            body: self.transform_instructions(&bucket.body),
+        }
+        .allocate()
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::collections::HashMap;
+    use compiler::circuit_design::template::TemplateCodeInfo;
+    use compiler::compiler_interface::Circuit;
+    use compiler::intermediate_representation::{Instruction, new_id};
+    use compiler::intermediate_representation::ir_interface::{
+        AddressType, Allocate, ComputeBucket, InstrContext, LoadBucket, LocationRule, LoopBucket,
+        OperatorType, StoreBucket, ValueBucket, ValueType,
+    };
+    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
+    use crate::passes::loop_unroll::LoopUnrollPass;
+
+    #[test]
+    fn test_loop_unrolling() {
+        let prime = "goldilocks".to_string();
+        let pass = LoopUnrollPass::new(&prime);
+        let mut circuit = example_program();
+        circuit.llvm_data.variable_index_mapping.insert("test_0".to_string(), HashMap::new());
+        circuit.llvm_data.signal_index_mapping.insert("test_0".to_string(), HashMap::new());
+        circuit.llvm_data.component_index_mapping.insert("test_0".to_string(), HashMap::new());
+        let new_circuit = pass.transform_circuit(&circuit);
+        if cfg!(debug_assertions) {
+            println!("{}", new_circuit.templates[0].body.last().unwrap().to_string());
+        }
+        assert_ne!(circuit, new_circuit);
+        match new_circuit.templates[0].body.last().unwrap().as_ref() {
+            Instruction::Block(b) => {
+                // 5 iterations unrolled into 5 call statements targeting extracted loop body functions
+                assert_eq!(b.body.len(), 5);
+                assert!(b.body.iter().all(|s| if let Instruction::Call(c) = s.as_ref() {
+                    c.symbol.starts_with(LOOP_BODY_FN_PREFIX)
+                } else {
+                    false
+                }));
+            }
+            _ => assert!(false),
+        }
+    }
+
+    fn example_program() -> Circuit {
+        Circuit {
+            wasm_producer: Default::default(),
+            c_producer: Default::default(),
+            llvm_data: Default::default(),
+            templates: vec![Box::new(TemplateCodeInfo {
+                id: 0,
+                source_file_id: None,
+                line: 0,
+                header: "test_0".to_string(),
+                name: "test".to_string(),
+                is_parallel: false,
+                is_parallel_component: false,
+                is_not_parallel_component: false,
+                has_parallel_sub_cmp: false,
+                number_of_inputs: 0,
+                number_of_outputs: 0,
+                number_of_intermediates: 0,
+                body: vec![
+                    // (store 0 0)
+                    StoreBucket {
+                        id: new_id(),
+                        source_file_id: None,
+                        line: 0,
+                        message_id: 0,
+                        context: InstrContext { size: 0 },
+                        dest_is_output: false,
+                        dest_address_type: AddressType::Variable,
+                        dest: LocationRule::Indexed {
+                            location: ValueBucket {
+                                id: new_id(),
+                                source_file_id: None,
+                                line: 0,
+                                message_id: 0,
+                                parse_as: ValueType::U32,
+                                op_aux_no: 0,
+                                value: 0,
+                            }
+                            .allocate(),
+                            template_header: Some("test_0".to_string()),
+                        },
+                        src: ValueBucket {
+                            id: new_id(),
+                            source_file_id: None,
+                            line: 0,
+                            message_id: 0,
+                            parse_as: ValueType::U32,
+                            op_aux_no: 0,
+                            value: 0,
+                        }
+                        .allocate(),
+                        bounded_fn: None,
+                    }
+                    .allocate(),
+                    // (store 1 0)
+                    StoreBucket {
+                        id: new_id(),
+                        source_file_id: None,
+                        line: 0,
+                        message_id: 0,
+                        context: InstrContext { size: 0 },
+                        dest_is_output: false,
+                        dest_address_type: AddressType::Variable,
+                        dest: LocationRule::Indexed {
+                            location: ValueBucket {
+                                id: new_id(),
+                                source_file_id: None,
+                                line: 0,
+                                message_id: 0,
+                                parse_as: ValueType::U32,
+                                op_aux_no: 0,
+                                value: 1,
+                            }
+                            .allocate(),
+                            template_header: Some("test_0".to_string()),
+                        },
+                        src: ValueBucket {
+                            id: new_id(),
+                            source_file_id: None,
+                            line: 0,
+                            message_id: 0,
+                            parse_as: ValueType::U32,
+                            op_aux_no: 0,
+                            value: 0,
+                        }
+                        .allocate(),
+                        bounded_fn: None,
+                    }
+                    .allocate(),
+                    // (loop (compute le (load 1) 5) (
+                    LoopBucket {
+                        id: new_id(),
+                        source_file_id: None,
+                        line: 0,
+                        message_id: 0,
+                        continue_condition: ComputeBucket {
+                            id: new_id(),
+                            source_file_id: None,
+                            line: 0,
+                            message_id: 0,
+                            op: OperatorType::Lesser,
+                            op_aux_no: 0,
+                            stack: vec![
+                                LoadBucket {
+                                    id: new_id(),
+                                    source_file_id: None,
+                                    line: 0,
+                                    message_id: 0,
+                                    address_type: AddressType::Variable,
+                                    src: LocationRule::Indexed {
+                                        location: ValueBucket {
+                                            id: new_id(),
+                                            source_file_id: None,
+                                            line: 0,
+                                            message_id: 0,
+                                            parse_as: ValueType::U32,
+                                            op_aux_no: 0,
+                                            value: 1,
+                                        }
+                                        .allocate(),
+                                        template_header: Some("test_0".to_string()),
+                                    },
+                                    bounded_fn: None,
+                                }
+                                .allocate(),
+                                ValueBucket {
+                                    id: new_id(),
+                                    source_file_id: None,
+                                    line: 0,
+                                    message_id: 0,
+                                    parse_as: ValueType::U32,
+                                    op_aux_no: 0,
+                                    value: 5,
+                                }
+                                .allocate(),
+                            ],
+                        }
+                        .allocate(),
+                        body: vec![
+                            //   (store 0 (compute add (load 0) 2))
+                            StoreBucket {
+                                id: new_id(),
+                                source_file_id: None,
+                                line: 0,
+                                message_id: 0,
+                                context: InstrContext { size: 0 },
+                                dest_is_output: false,
+                                dest_address_type: AddressType::Variable,
+                                dest: LocationRule::Indexed {
+                                    location: ValueBucket {
+                                        id: new_id(),
+                                        source_file_id: None,
+                                        line: 0,
+                                        message_id: 0,
+                                        parse_as: ValueType::U32,
+                                        op_aux_no: 0,
+                                        value: 0,
+                                    }
+                                    .allocate(),
+                                    template_header: None,
+                                },
+                                src: ComputeBucket {
+                                    id: new_id(),
+                                    source_file_id: None,
+                                    line: 0,
+                                    message_id: 0,
+                                    op: OperatorType::Add,
+                                    op_aux_no: 0,
+                                    stack: vec![
+                                        LoadBucket {
+                                            id: new_id(),
+                                            source_file_id: None,
+                                            line: 0,
+                                            message_id: 0,
+                                            address_type: AddressType::Variable,
+                                            src: LocationRule::Indexed {
+                                                location: ValueBucket {
+                                                    id: new_id(),
+                                                    source_file_id: None,
+                                                    line: 0,
+                                                    message_id: 0,
+                                                    parse_as: ValueType::U32,
+                                                    op_aux_no: 0,
+                                                    value: 0,
+                                                }
+                                                .allocate(),
+                                                template_header: Some("test_0".to_string()),
+                                            },
+                                            bounded_fn: None,
+                                        }
+                                        .allocate(),
+                                        ValueBucket {
+                                            id: new_id(),
+                                            source_file_id: None,
+                                            line: 0,
+                                            message_id: 0,
+                                            parse_as: ValueType::U32,
+                                            op_aux_no: 0,
+                                            value: 2,
+                                        }
+                                        .allocate(),
+                                    ],
+                                }
+                                .allocate(),
+                                bounded_fn: None,
+                            }
+                            .allocate(),
+                            //   (store 1 (compute add (load 1) 1))
+                            StoreBucket {
+                                id: new_id(),
+                                source_file_id: None,
+                                line: 0,
+                                message_id: 0,
+                                context: InstrContext { size: 0 },
+                                dest_is_output: false,
+                                dest_address_type: AddressType::Variable,
+                                dest: LocationRule::Indexed {
+                                    location: ValueBucket {
+                                        id: new_id(),
+                                        source_file_id: None,
+                                        line: 0,
+                                        message_id: 0,
+                                        parse_as: ValueType::U32,
+                                        op_aux_no: 0,
+                                        value: 1,
+                                    }
+                                    .allocate(),
+                                    template_header: None,
+                                },
+                                src: ComputeBucket {
+                                    id: new_id(),
+                                    source_file_id: None,
+                                    line: 0,
+                                    message_id: 0,
+                                    op: OperatorType::Add,
+                                    op_aux_no: 0,
+                                    stack: vec![
+                                        LoadBucket {
+                                            id: new_id(),
+                                            source_file_id: None,
+                                            line: 0,
+                                            message_id: 0,
+                                            address_type: AddressType::Variable,
+                                            src: LocationRule::Indexed {
+                                                location: ValueBucket {
+                                                    id: new_id(),
+                                                    source_file_id: None,
+                                                    line: 0,
+                                                    message_id: 0,
+                                                    parse_as: ValueType::U32,
+                                                    op_aux_no: 0,
+                                                    value: 1,
+                                                }
+                                                .allocate(),
+                                                template_header: Some("test_0".to_string()),
+                                            },
+                                            bounded_fn: None,
+                                        }
+                                        .allocate(),
+                                        ValueBucket {
+                                            id: new_id(),
+                                            source_file_id: None,
+                                            line: 0,
+                                            message_id: 0,
+                                            parse_as: ValueType::U32,
+                                            op_aux_no: 0,
+                                            value: 1,
+                                        }
+                                        .allocate(),
+                                    ],
+                                }
+                                .allocate(),
+                                bounded_fn: None,
+                            }
+                            .allocate(),
+                        ],
+                    }
+                    .allocate(), // ))
+                ],
+                var_stack_depth: 0,
+                expression_stack_depth: 0,
+                signal_stack_depth: 0,
+                number_of_components: 0,
+            })],
+            functions: vec![],
+        }
+    }
+}

From 8e5a64217a4930e28e7211495257504cbbe488be Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 11 Sep 2023 22:47:58 -0500
Subject: [PATCH 18/42] move memory.rs to interpreter folder

---
 circuit_passes/src/{passes => bucket_interpreter}/memory.rs   | 0
 circuit_passes/src/bucket_interpreter/mod.rs                  | 3 ++-
 circuit_passes/src/bucket_interpreter/value.rs                | 2 +-
 circuit_passes/src/passes/conditional_flattening.rs           | 2 +-
 .../src/passes/deterministic_subcomponent_invocation.rs       | 2 +-
 circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs    | 2 +-
 circuit_passes/src/passes/loop_unroll/mod.rs                  | 3 +--
 circuit_passes/src/passes/mapped_to_indexed.rs                | 2 +-
 circuit_passes/src/passes/mod.rs                              | 1 -
 circuit_passes/src/passes/simplification.rs                   | 2 +-
 circuit_passes/src/passes/unknown_index_sanitization.rs       | 4 ++--
 11 files changed, 11 insertions(+), 12 deletions(-)
 rename circuit_passes/src/{passes => bucket_interpreter}/memory.rs (100%)

diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
similarity index 100%
rename from circuit_passes/src/passes/memory.rs
rename to circuit_passes/src/bucket_interpreter/memory.rs
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 5f8e37b7d..ae92f4604 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -1,5 +1,6 @@
 pub mod value;
 pub mod env;
+pub mod memory;
 pub mod observer;
 pub(crate) mod operations;
 
@@ -11,10 +12,10 @@ use compiler::num_bigint::BigInt;
 use observer::InterpreterObserver;
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
-use crate::passes::memory::PassMemory;
 use crate::passes::LOOP_BODY_FN_PREFIX;
 
 pub struct BucketInterpreter<'a> {
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 3ba9df343..954b32e0a 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -4,8 +4,8 @@ use compiler::num_bigint::BigInt;
 use compiler::num_traits::ToPrimitive;
 use compiler::intermediate_representation::new_id;
 use circom_algebra::modular_arithmetic;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
-use crate::passes::memory::PassMemory;
 
 pub trait JoinSemiLattice {
     fn join(&self, other: &Self) -> Self;
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index faba64d1e..2c6128d7f 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -5,9 +5,9 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 
 pub struct ConditionalFlattening {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 99b612143..a834f7535 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -5,9 +5,9 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::intermediate_representation::ir_interface::StatusInput::{Last, NoLast};
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 
 pub struct DeterministicSubCmpInvokePass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 0f52655ef..6895f434d 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -4,9 +4,9 @@ use std::fmt::{Debug, Formatter};
 use compiler::intermediate_representation::BucketId;
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::memory::PassMemory;
 
 /// Holds values of index variables at array loads/stores within a loop
 pub struct VariableValues<'a> {
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index be6b734c8..e7fab158a 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -5,7 +5,6 @@ mod body_extractor;
 use std::cell::RefCell;
 use std::collections::BTreeMap;
 use std::vec;
-use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{
@@ -13,9 +12,9 @@ use compiler::intermediate_representation::{
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
 
 use self::body_extractor::LoopBodyExtractor;
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 794ad73cc..31ee34405 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -5,11 +5,11 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::intermediate_representation::{InstructionPointer, UpdateId};
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value::KnownU32;
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 
 pub struct MappedToIndexedPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index a10f1e9bf..bbe9c107e 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -20,7 +20,6 @@ mod deterministic_subcomponent_invocation;
 mod mapped_to_indexed;
 mod unknown_index_sanitization;
 mod checks;
-pub(crate) mod memory;
 
 pub const LOOP_BODY_FN_PREFIX: &str = const_format::concatcp!(GENERATED_FN_PREFIX, "loop.body.");
 
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 3fab2932e..4b4d003dd 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -5,10 +5,10 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 
 pub struct SimplificationPass {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index d4762d239..fd399df9c 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -9,12 +9,12 @@ use compiler::num_bigint::BigInt;
 use code_producers::llvm_elements::array_switch::{get_array_load_symbol, get_array_store_symbol};
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
-use crate::bucket_interpreter::R;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_operation;
+use crate::bucket_interpreter::R;
 use crate::bucket_interpreter::value::Value::{KnownU32, KnownBigInt};
 use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
 
 struct ZeroingInterpreter<'a> {
     pub constant_fields: &'a Vec<String>,

From aec62f7f08d8062fab6eead7a637f06d9c3e531d Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 12 Sep 2023 09:48:49 -0500
Subject: [PATCH 19/42] remove unused JoinSemiLattice

---
 circuit_passes/src/bucket_interpreter/env.rs  | 43 +------------------
 circuit_passes/src/bucket_interpreter/mod.rs  | 18 +-------
 .../src/bucket_interpreter/value.rs           | 16 -------
 3 files changed, 2 insertions(+), 75 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
index f6915e82b..7de12efea 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env.rs
@@ -4,7 +4,7 @@ use std::fmt::{Display, Formatter};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
+use crate::bucket_interpreter::value::Value;
 
 pub trait ContextSwitcher {
     fn switch<'a>(
@@ -19,24 +19,6 @@ pub trait LibraryAccess {
     fn get_template(&self, name: &String) -> Ref<TemplateCode>;
 }
 
-impl<L: JoinSemiLattice + Clone> JoinSemiLattice for HashMap<usize, L> {
-    fn join(&self, other: &Self) -> Self {
-        let mut new: HashMap<usize, L> = Default::default();
-        for (k, v) in self {
-            new.insert(*k, v.clone());
-        }
-
-        for (k, v) in other {
-            if new.contains_key(&k) {
-                new.get_mut(&k).unwrap().join(v);
-            } else {
-                new.insert(*k, v.clone());
-            }
-        }
-        new
-    }
-}
-
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct SubcmpEnv<'a> {
     pub signals: HashMap<usize, Value>,
@@ -45,19 +27,6 @@ pub struct SubcmpEnv<'a> {
     template_id: usize,
 }
 
-impl JoinSemiLattice for SubcmpEnv<'_> {
-    fn join(&self, other: &Self) -> Self {
-        assert_eq!(self.name, other.name);
-        assert_eq!(self.template_id, other.template_id);
-        SubcmpEnv {
-            signals: self.signals.join(&other.signals),
-            counter: std::cmp::min(self.counter, other.counter),
-            name: self.name,
-            template_id: self.template_id,
-        }
-    }
-}
-
 impl<'a> SubcmpEnv<'a> {
     pub fn new(inputs: usize, name: &'a String, template_id: usize) -> Self {
         SubcmpEnv { signals: Default::default(), counter: inputs, name, template_id }
@@ -268,14 +237,4 @@ impl<'a> Env<'a> {
         );
         r.0.expect("Function must return a value!")
     }
-
-    pub fn join(&self, other: &Self) -> Self {
-        Env {
-            vars: self.vars.join(&other.vars),
-            signals: self.signals.join(&other.signals),
-            subcmps: self.subcmps.join(&other.subcmps),
-            libs: self.libs,
-            context_switcher: self.context_switcher,
-        }
-    }
 }
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index ae92f4604..284e299b5 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -14,7 +14,7 @@ use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::operations::compute_offset;
-use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
+use crate::bucket_interpreter::value::Value;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 use crate::passes::LOOP_BODY_FN_PREFIX;
 
@@ -27,22 +27,6 @@ pub struct BucketInterpreter<'a> {
 
 pub type R<'a> = (Option<Value>, Env<'a>);
 
-impl JoinSemiLattice for Option<Value> {
-    fn join(&self, other: &Self) -> Self {
-        match (self, other) {
-            (x, None) => x.clone(),
-            (None, x) => x.clone(),
-            (Some(x), Some(y)) => Some(x.join(y)),
-        }
-    }
-}
-
-impl JoinSemiLattice for R<'_> {
-    fn join(&self, other: &Self) -> Self {
-        (self.0.join(&other.0), self.1.join(&other.1))
-    }
-}
-
 impl<'a> BucketInterpreter<'a> {
     pub fn init(observer: &'a dyn InterpreterObserver, mem: &'a PassMemory, scope: String) -> Self {
         BucketInterpreter {
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 954b32e0a..6228e89de 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -7,10 +7,6 @@ use circom_algebra::modular_arithmetic;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 
-pub trait JoinSemiLattice {
-    fn join(&self, other: &Self) -> Self;
-}
-
 /// Poor man's lattice that gives up the moment values are not equal
 /// It's a join semi lattice with a top (Unknown)
 /// Not a complete lattice because there is no bottom
@@ -41,18 +37,6 @@ impl Debug for Value {
     }
 }
 
-impl JoinSemiLattice for Value {
-    /// a ⊔ b = a    iff a = b
-    /// a ⊔ b = UNK  otherwise
-    fn join(&self, other: &Self) -> Self {
-        if self == other {
-            self.clone()
-        } else {
-            Unknown
-        }
-    }
-}
-
 impl Value {
     pub fn get_u32(&self) -> usize {
         match self {

From 5a235f86aff0bb84a48117bd62662bd06058e08f Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 13 Sep 2023 12:16:02 -0500
Subject: [PATCH 20/42] BROKEN: refactor Env and add different cases

---
 .../env/extracted_func_env.rs                 | 141 +++++++++
 .../src/bucket_interpreter/env/mod.rs         | 290 ++++++++++++++++++
 .../{env.rs => env/standard_env.rs}           | 144 ++-------
 .../env/unrolled_block_env.rs                 | 154 ++++++++++
 .../src/bucket_interpreter/memory.rs          |   4 +-
 circuit_passes/src/bucket_interpreter/mod.rs  |  77 +++--
 circuit_passes/src/passes/loop_unroll/mod.rs  |   5 +-
 circuit_passes/src/passes/mod.rs              |   2 +-
 8 files changed, 684 insertions(+), 133 deletions(-)
 create mode 100644 circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
 create mode 100644 circuit_passes/src/bucket_interpreter/env/mod.rs
 rename circuit_passes/src/bucket_interpreter/{env.rs => env/standard_env.rs} (57%)
 create mode 100644 circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs

diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
new file mode 100644
index 000000000..a39e28e1d
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -0,0 +1,141 @@
+use std::cell::Ref;
+use std::collections::HashMap;
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use super::{Env, ContextSwitcher, LibraryAccess};
+
+/// This Env is used to process functions created when extracting loop bodies into
+/// `LOOP_BODY_FN_PREFIX` functions.
+#[derive(Clone)]
+pub struct ExtractedFuncEnvData<'a> {
+    base: Box<Env<'a>>,
+}
+
+impl Display for ExtractedFuncEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        self.base.fmt(f)
+    }
+}
+impl ContextSwitcher for ExtractedFuncEnvData<'_> {
+    fn switch<'a>(
+        &'a self,
+        interpreter: &'a BucketInterpreter<'a>,
+        scope: &String,
+    ) -> BucketInterpreter<'a> {
+        self.base.switch(interpreter, scope)
+    }
+}
+
+impl LibraryAccess for ExtractedFuncEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        self.base.get_function(name)
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.base.get_template(name)
+    }
+}
+
+impl<'a> ExtractedFuncEnvData<'a> {
+    pub fn new(inner: Env<'a>) -> Self {
+        ExtractedFuncEnvData { base: Box::new(inner) }
+    }
+
+    pub fn get_var(&self, idx: usize) -> Value {
+        println!("get_var({}) = {}", idx, self.base.get_var(idx));
+        self.base.get_var(idx)
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        println!("get_signal({}) = {}", idx, self.base.get_signal(idx));
+        self.base.get_signal(idx)
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        //NOTE: `signal_idx` will always be 0 for the fixed* parameters
+        println!("TODO: must handle args here in addition to subcomps");
+        // self.base.get_subcmp_signal(subcmp_idx, signal_idx)
+        Value::Unknown
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        todo!();
+        self.base.get_subcmp_name(subcmp_idx)
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        todo!();
+        self.base.get_subcmp_template_id(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        todo!();
+        self.base.subcmp_counter_is_zero(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        todo!();
+        self.base.subcmp_counter_equal_to(subcmp_idx, value)
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        todo!();
+        self.base.get_vars_clone()
+    }
+
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        println!("set_var({}, {}), old = {}", idx, value, self.base.get_var(idx));
+        ExtractedFuncEnvData { base: Box::new(self.base.set_var(idx, value)) }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        println!("set_signal({}, {}), old = {}", idx, value, self.base.get_signal(idx));
+        ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)) }
+    }
+
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        todo!();
+        ExtractedFuncEnvData { base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)) }
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        todo!();
+        ExtractedFuncEnvData {
+            base: Box::new(self.base.set_subcmp_signal(subcmp_idx, signal_idx, value)),
+        }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        todo!();
+        ExtractedFuncEnvData { base: Box::new(self.base.decrease_subcmp_counter(subcmp_idx)) }
+    }
+
+    pub fn run_subcmp(
+        self,
+        subcmp_idx: usize,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        observe: bool,
+    ) -> Self {
+        todo!();
+        ExtractedFuncEnvData {
+            base: Box::new(self.base.run_subcmp(subcmp_idx, name, interpreter, observe)),
+        }
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &'a String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        todo!();
+        ExtractedFuncEnvData {
+            base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
new file mode 100644
index 000000000..2a6feffb0
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -0,0 +1,290 @@
+use std::cell::Ref;
+use std::collections::HashMap;
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::loop_unroll::body_extractor::LoopBodyExtractor;
+use self::extracted_func_env::ExtractedFuncEnvData;
+use self::standard_env::StandardEnvData;
+use self::unrolled_block_env::UnrolledBlockEnvData;
+
+mod standard_env;
+mod unrolled_block_env;
+mod extracted_func_env;
+
+pub trait ContextSwitcher {
+    fn switch<'a>(
+        &'a self,
+        interpreter: &'a BucketInterpreter<'a>,
+        scope: &String,
+    ) -> BucketInterpreter<'a>;
+}
+
+pub trait LibraryAccess {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode>;
+    fn get_template(&self, name: &String) -> Ref<TemplateCode>;
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SubcmpEnv {
+    signals: HashMap<usize, Value>,
+    counter: usize,
+    name: String,
+    template_id: usize,
+}
+
+impl SubcmpEnv {
+    pub fn new(inputs: usize, name: &String, template_id: usize) -> Self {
+        SubcmpEnv { signals: Default::default(), counter: inputs, name: name.clone(), template_id }
+    }
+
+    pub fn reset(self) -> Self {
+        let mut copy = self;
+        copy.signals.clear();
+        copy
+    }
+
+    pub fn get_signal(&self, index: usize) -> Value {
+        self.signals.get(&index).unwrap_or_default().clone()
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> SubcmpEnv {
+        let mut copy = self;
+        copy.signals.insert(idx, value);
+        copy
+    }
+
+    pub fn counter_is_zero(&self) -> bool {
+        self.counter == 0
+    }
+
+    pub fn decrease_counter(self) -> SubcmpEnv {
+        let mut copy = self;
+        copy.counter -= 1;
+        copy
+    }
+
+    pub fn counter_equal_to(&self, value: usize) -> bool {
+        self.counter == value
+    }
+}
+
+// An immutable environment whose modification methods return a new object
+#[derive(Clone)]
+pub enum Env<'a> {
+    Standard(StandardEnvData<'a>),
+    UnrolledBlock(UnrolledBlockEnvData<'a>),
+    ExtractedFunction(ExtractedFuncEnvData<'a>),
+}
+
+impl Display for Env<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            Env::Standard(d) => d.fmt(f),
+            Env::UnrolledBlock(d) => d.fmt(f),
+            Env::ExtractedFunction(d) => d.fmt(f),
+        }
+    }
+}
+
+impl ContextSwitcher for Env<'_> {
+    fn switch<'a>(
+        &'a self,
+        interpreter: &'a BucketInterpreter<'a>,
+        scope: &String,
+    ) -> BucketInterpreter<'a> {
+        match self {
+            Env::Standard(d) => d.switch(interpreter, scope),
+            Env::UnrolledBlock(d) => d.switch(interpreter, scope),
+            Env::ExtractedFunction(d) => d.switch(interpreter, scope),
+        }
+    }
+}
+
+impl LibraryAccess for Env<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        match self {
+            Env::Standard(d) => d.get_function(name),
+            Env::UnrolledBlock(d) => d.get_function(name),
+            Env::ExtractedFunction(d) => d.get_function(name),
+        }
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        match self {
+            Env::Standard(d) => d.get_template(name),
+            Env::UnrolledBlock(d) => d.get_template(name),
+            Env::ExtractedFunction(d) => d.get_template(name),
+        }
+    }
+}
+
+impl<'a> Env<'a> {
+    pub fn new_standard_env(
+        libs: &'a dyn LibraryAccess,
+        context_switcher: &'a dyn ContextSwitcher,
+    ) -> Self {
+        Env::Standard(StandardEnvData::new(libs, context_switcher))
+    }
+
+    pub fn new_unroll_block_env(inner: Env<'a>, extractor: &'a LoopBodyExtractor) -> Self {
+        Env::UnrolledBlock(UnrolledBlockEnvData::new(inner, extractor))
+    }
+
+    pub fn new_extracted_func_env(inner: Env<'a>) -> Self {
+        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner))
+    }
+
+    // READ OPERATIONS
+    pub fn get_var(&self, idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_var(idx),
+            Env::UnrolledBlock(d) => d.get_var(idx),
+            Env::ExtractedFunction(d) => d.get_var(idx),
+        }
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_signal(idx),
+            Env::UnrolledBlock(d) => d.get_signal(idx),
+            Env::ExtractedFunction(d) => d.get_signal(idx),
+        }
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+        }
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        match self {
+            Env::Standard(d) => d.get_subcmp_name(subcmp_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_name(subcmp_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_name(subcmp_idx),
+        }
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        match self {
+            Env::Standard(d) => d.get_subcmp_template_id(subcmp_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_template_id(subcmp_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_template_id(subcmp_idx),
+        }
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        match self {
+            Env::Standard(d) => d.subcmp_counter_is_zero(subcmp_idx),
+            Env::UnrolledBlock(d) => d.subcmp_counter_is_zero(subcmp_idx),
+            Env::ExtractedFunction(d) => d.subcmp_counter_is_zero(subcmp_idx),
+        }
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        match self {
+            Env::Standard(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+            Env::UnrolledBlock(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+            Env::ExtractedFunction(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+        }
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        match self {
+            Env::Standard(d) => d.get_vars_clone(),
+            Env::UnrolledBlock(d) => d.get_vars_clone(),
+            Env::ExtractedFunction(d) => d.get_vars_clone(),
+        }
+    }
+
+    // WRITE OPERATIONS
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_var(idx, value)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_var(idx, value)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_var(idx, value)),
+        }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_signal(idx, value)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_signal(idx, value)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_signal(idx, value)),
+        }
+    }
+
+    /// Sets all the signals of the subcmp to UNK
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_subcmp_to_unk(subcmp_idx)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_subcmp_to_unk(subcmp_idx)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_subcmp_to_unk(subcmp_idx)),
+        }
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_subcmp_signal(subcmp_idx, signal_idx, value)),
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.set_subcmp_signal(subcmp_idx, signal_idx, value))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.set_subcmp_signal(subcmp_idx, signal_idx, value))
+            }
+        }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.decrease_subcmp_counter(subcmp_idx)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.decrease_subcmp_counter(subcmp_idx)),
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.decrease_subcmp_counter(subcmp_idx))
+            }
+        }
+    }
+
+    pub fn run_subcmp(
+        self,
+        subcmp_idx: usize,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        observe: bool,
+    ) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.run_subcmp(subcmp_idx, name, interpreter, observe)),
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.run_subcmp(subcmp_idx, name, interpreter, observe))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.run_subcmp(subcmp_idx, name, interpreter, observe))
+            }
+        }
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &'a String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        match self {
+            Env::Standard(d) => {
+                Env::Standard(d.create_subcmp(name, base_index, count, template_id))
+            }
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.create_subcmp(name, base_index, count, template_id))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.create_subcmp(name, base_index, count, template_id))
+            }
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
similarity index 57%
rename from circuit_passes/src/bucket_interpreter/env.rs
rename to circuit_passes/src/bucket_interpreter/env/standard_env.rs
index 7de12efea..dc3e5a05c 100644
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -1,91 +1,54 @@
 use std::cell::Ref;
 use std::collections::HashMap;
-use std::fmt::{Display, Formatter};
+use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
+use super::{SubcmpEnv, ContextSwitcher, LibraryAccess};
 
-pub trait ContextSwitcher {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &'a String,
-    ) -> BucketInterpreter<'a>;
-}
-
-pub trait LibraryAccess {
-    fn get_function(&self, name: &String) -> Ref<FunctionCode>;
-    fn get_template(&self, name: &String) -> Ref<TemplateCode>;
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct SubcmpEnv<'a> {
+#[derive(Clone)]
+pub struct StandardEnvData<'a> {
+    pub vars: HashMap<usize, Value>,
     pub signals: HashMap<usize, Value>,
-    counter: usize,
-    name: &'a String,
-    template_id: usize,
+    pub subcmps: HashMap<usize, SubcmpEnv>,
+    pub libs: &'a dyn LibraryAccess,
+    pub context_switcher: &'a dyn ContextSwitcher,
 }
 
-impl<'a> SubcmpEnv<'a> {
-    pub fn new(inputs: usize, name: &'a String, template_id: usize) -> Self {
-        SubcmpEnv { signals: Default::default(), counter: inputs, name, template_id }
-    }
-
-    pub fn reset(self) -> Self {
-        let mut copy = self;
-        copy.signals.clear();
-        copy
-    }
-
-    pub fn get_signal(&self, index: usize) -> Value {
-        self.signals.get(&index).unwrap_or_default().clone()
-    }
-
-    pub fn set_signal(self, idx: usize, value: Value) -> SubcmpEnv<'a> {
-        let mut copy = self;
-        copy.signals.insert(idx, value);
-        copy
-    }
-
-    pub fn counter_is_zero(&self) -> bool {
-        self.counter == 0
-    }
-
-    pub fn decrease_counter(self) -> SubcmpEnv<'a> {
-        let mut copy = self;
-        copy.counter -= 1;
-        copy
+impl Display for StandardEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(
+            f,
+            "\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}",
+            self.vars, self.signals, self.subcmps
+        )
     }
+}
 
-    pub fn counter_equal_to(&self, value: usize) -> bool {
-        self.counter == value
+impl ContextSwitcher for StandardEnvData<'_> {
+    fn switch<'a>(
+        &'a self,
+        interpreter: &'a BucketInterpreter<'a>,
+        scope: &String,
+    ) -> BucketInterpreter<'a> {
+        self.context_switcher.switch(interpreter, scope)
     }
 }
 
-// An immutable env that returns a new copy when modified
-#[derive(Clone)]
-pub struct Env<'a> {
-    vars: HashMap<usize, Value>,
-    signals: HashMap<usize, Value>,
-    subcmps: HashMap<usize, SubcmpEnv<'a>>,
-    libs: &'a dyn LibraryAccess,
-    context_switcher: &'a dyn ContextSwitcher,
-}
+impl LibraryAccess for StandardEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        self.libs.get_function(name)
+    }
 
-impl Display for Env<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}",
-            self.vars, self.signals, self.subcmps
-        )
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.libs.get_template(name)
     }
 }
 
-impl<'a> Env<'a> {
+impl<'a> StandardEnvData<'a> {
     pub fn new(libs: &'a dyn LibraryAccess, context_switcher: &'a dyn ContextSwitcher) -> Self {
-        Env {
+        StandardEnvData {
             vars: Default::default(),
             signals: Default::default(),
             subcmps: Default::default(),
@@ -108,7 +71,7 @@ impl<'a> Env<'a> {
     }
 
     pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
-        self.subcmps[&subcmp_idx].name
+        &self.subcmps[&subcmp_idx].name
     }
 
     pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
@@ -140,20 +103,6 @@ impl<'a> Env<'a> {
         copy
     }
 
-    pub fn set_all_to_unk(self) -> Self {
-        let mut copy = self;
-        for (_, v) in copy.vars.iter_mut() {
-            *v = Value::Unknown;
-        }
-        for (_, v) in copy.signals.iter_mut() {
-            *v = Value::Unknown;
-        }
-        for (_, v) in copy.subcmps.iter_mut() {
-            v.signals.clear();
-        }
-        copy
-    }
-
     /// Sets all the signals of the subcmp to UNK
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         let mut copy = self;
@@ -166,7 +115,6 @@ impl<'a> Env<'a> {
     }
 
     pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        //let subcmp = &self.subcmps[&subcmp_idx];
         let mut copy = self;
         let subcmp_env = copy
             .subcmps
@@ -201,40 +149,16 @@ impl<'a> Env<'a> {
 
     pub fn create_subcmp(
         self,
-        name: &'a String,
+        name: &String,
         base_index: usize,
         count: usize,
         template_id: usize,
     ) -> Self {
-        let number_of_inputs = self.libs.get_template(name).number_of_inputs;
+        let number_of_inputs = self.get_template(name).number_of_inputs;
         let mut copy = self;
         for i in base_index..(base_index + count) {
             copy.subcmps.insert(i, SubcmpEnv::new(number_of_inputs, name, template_id));
         }
         copy
     }
-
-    pub fn run_function(
-        &self,
-        name: &String,
-        interpreter: &BucketInterpreter,
-        args: Vec<Value>,
-        observe: bool,
-    ) -> Value {
-        if cfg!(debug_assertions) {
-            println!("Running function {}", name);
-        }
-        let code = &self.libs.get_function(name).body;
-        let mut function_env = Env::new(self.libs, self.context_switcher);
-        for (id, arg) in args.iter().enumerate() {
-            function_env = function_env.set_var(id, arg.clone());
-        }
-        let interpreter = self.context_switcher.switch(interpreter, name);
-        let r = interpreter.execute_instructions(
-            code,
-            function_env,
-            !interpreter.observer.ignore_function_calls() && observe,
-        );
-        r.0.expect("Function must return a value!")
-    }
 }
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
new file mode 100644
index 000000000..7614a2d22
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -0,0 +1,154 @@
+use std::cell::Ref;
+use std::collections::HashMap;
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::body_extractor::LoopBodyExtractor;
+use super::{Env, ContextSwitcher, LibraryAccess};
+
+/// This Env is used by the loop unroller to process the BlockBucket containing a
+/// unrolled loop specifically handling the case where the LibraryAccess does not
+/// contain the functions generated to hold the extracted loop bodies. It instead
+/// uses the temporary list in the LoopBodyExtractor to get those function bodies.
+#[derive(Clone)]
+pub struct UnrolledBlockEnvData<'a> {
+    base: Box<Env<'a>>,
+    extractor: &'a LoopBodyExtractor,
+}
+
+impl Display for UnrolledBlockEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        self.base.fmt(f)
+    }
+}
+
+impl ContextSwitcher for UnrolledBlockEnvData<'_> {
+    fn switch<'a>(
+        &'a self,
+        interpreter: &'a BucketInterpreter<'a>,
+        scope: &String,
+    ) -> BucketInterpreter<'a> {
+        self.base.switch(interpreter, scope)
+    }
+}
+
+impl LibraryAccess for UnrolledBlockEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        if name.starts_with(LOOP_BODY_FN_PREFIX) {
+            Ref::map(self.extractor.get_new_functions(), |f| {
+                f.iter()
+                    .find(|f| f.name.eq(name))
+                    .expect("Cannot find extracted function definition!")
+            })
+        } else {
+            self.base.get_function(name)
+        }
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.base.get_template(name)
+    }
+}
+
+impl<'a> UnrolledBlockEnvData<'a> {
+    pub fn new(base: Env<'a>, extractor: &'a LoopBodyExtractor) -> Self {
+        UnrolledBlockEnvData { base: Box::new(base), extractor }
+    }
+
+    pub fn get_var(&self, idx: usize) -> Value {
+        self.base.get_var(idx)
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        self.base.get_signal(idx)
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        self.base.get_subcmp_signal(subcmp_idx, signal_idx)
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        self.base.get_subcmp_name(subcmp_idx)
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        self.base.get_subcmp_template_id(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        self.base.subcmp_counter_is_zero(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        self.base.subcmp_counter_equal_to(subcmp_idx, value)
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        self.base.get_vars_clone()
+    }
+
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_var(idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_signal(idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_subcmp_signal(subcmp_idx, signal_idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.decrease_subcmp_counter(subcmp_idx)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn run_subcmp(
+        self,
+        subcmp_idx: usize,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        observe: bool,
+    ) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.run_subcmp(subcmp_idx, name, interpreter, observe)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &'a String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
+            extractor: self.extractor,
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
index f20872a6e..3bc77d7b6 100644
--- a/circuit_passes/src/bucket_interpreter/memory.rs
+++ b/circuit_passes/src/bucket_interpreter/memory.rs
@@ -51,7 +51,7 @@ impl PassMemory {
             println!("Running template {}", self.current_scope.borrow());
         }
         let interpreter = self.build_interpreter(observer);
-        let env = Env::new(self, self);
+        let env = Env::new_standard_env(self, self);
         interpreter.execute_instructions(&template.body, env, true);
     }
 
@@ -146,7 +146,7 @@ impl ContextSwitcher for PassMemory {
     fn switch<'a>(
         &'a self,
         interpreter: &'a BucketInterpreter<'a>,
-        scope: &'a String,
+        scope: &String,
     ) -> BucketInterpreter<'a> {
         self.build_interpreter_with_scope(interpreter.observer, scope.clone())
     }
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 284e299b5..9e4dc96dc 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -4,6 +4,8 @@ pub mod memory;
 pub mod observer;
 pub(crate) mod operations;
 
+use std::cell::Ref;
+use std::vec;
 use circom_algebra::modular_arithmetic;
 use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer};
@@ -17,6 +19,7 @@ use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 use crate::passes::LOOP_BODY_FN_PREFIX;
+use self::env::{LibraryAccess, ContextSwitcher};
 
 pub struct BucketInterpreter<'a> {
     pub(crate) observer: &'a dyn InterpreterObserver,
@@ -225,12 +228,10 @@ impl<'a> BucketInterpreter<'a> {
                 }
             }
             AddressType::SubcmpSignal { cmp_address, .. } => {
+                println!("Load SubcmpSignal: {}", cmp_address.to_string());
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
-                let addr = addr
-                    .expect(
-                        "cmp_address instruction in StoreBucket SubcmpSignal must produce a value!",
-                    )
-                    .get_u32();
+                let addr =
+                    addr.expect("cmp_address in SubcmpSignal must produce a value!").get_u32();
                 let continue_observing =
                     if observe { self.observer.on_location_rule(&bucket.src, &env) } else { false };
                 let (idx, env) = match &bucket.src {
@@ -409,6 +410,45 @@ impl<'a> BucketInterpreter<'a> {
         (computed_value, env)
     }
 
+    fn run_function<'env>(
+        &self,
+        env: Env<'env>,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        args: Vec<Value>,
+        observe: bool,
+    ) -> R<'env> {
+        if cfg!(debug_assertions) {
+            println!("Running function {}", name);
+        }
+        let mut new_env = if name.starts_with(LOOP_BODY_FN_PREFIX) {
+            Env::new_extracted_func_env(env)
+        } else {
+            Env::new_standard_env(self.mem, self.mem)
+        };
+        for (id, arg) in args.iter().enumerate() {
+            new_env = new_env.set_var(id, arg.clone());
+        }
+
+        // let instructions = &func.body;
+        let instructions = Ref::map(self.mem.get_function(name), |f| &f.body);
+        let interp = self.mem.switch(interpreter, name);
+        // let temp = interp.execute_instructions(
+        //     &instructions,
+        //     new_env,
+        //     observe && !interp.observer.ignore_function_calls(),
+        // );
+        // temp
+        let observe = observe && !interp.observer.ignore_function_calls();
+        let mut last = (None, new_env);
+        for inst in instructions.iter() {
+            last = self.execute_instruction(inst, last.1, observe);
+        }
+        last //TODO: how can I make this work!?
+             // (last.0, last.1.clone()) // or this?
+             //                          // (last.0, env.clone())
+    }
+
     pub fn execute_call_bucket<'env>(
         &self,
         bucket: &'env CallBucket,
@@ -416,14 +456,13 @@ impl<'a> BucketInterpreter<'a> {
         observe: bool,
     ) -> R<'env> {
         let mut env = env;
-        let res = if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
-            // The extracted loop body functions can change any values in the environment via the
-            //  parameters passed to it. For now, use the naive approach of setting everything to
-            //  Unknown. This could be improved with special handling for these types of functions.
-            env = env.set_all_to_unk();
-            Unknown
-        } else if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
-            Unknown
+        let res = if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
+            (Some(Unknown), env)
+        } else if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
+            // The extracted loop body functions can change any values in the environment
+            //  via the parameters passed to it. So interpret the function and keep the
+            //  resulting Env (as if the function had executed inline).
+            self.run_function(env, &bucket.symbol, self, vec![], observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
@@ -432,22 +471,24 @@ impl<'a> BucketInterpreter<'a> {
                 args.push(value.expect("Function argument must produce a value!"));
             }
             if args.iter().any(|v| v.is_unknown()) {
-                Unknown
+                (Some(Unknown), env)
             } else {
-                env.run_function(&bucket.symbol, self, args, observe)
+                // Ignore the resulting Env from the callee function, using the one in the current caller
+                let (v, _) = self.run_function(env.clone(), &bucket.symbol, self, args, observe);
+                (v, env)
             }
         };
 
         // Write the result in the destination according to the ReturnType
         match &bucket.return_info {
-            ReturnType::Intermediate { .. } => (Some(res), env),
+            ReturnType::Intermediate { .. } => res,
             ReturnType::Final(final_data) => (
                 None,
                 self.store_value_in_address(
                     &final_data.dest_address_type,
                     &final_data.dest,
-                    res,
-                    env,
+                    res.0.expect("Function must return a value!"),
+                    res.1,
                     observe,
                 ),
             ),
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index e7fab158a..b3c136dc4 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -1,6 +1,6 @@
 mod loop_env_recorder;
 mod extracted_location_updater;
-mod body_extractor;
+pub mod body_extractor;
 
 use std::cell::RefCell;
 use std::collections::BTreeMap;
@@ -120,7 +120,8 @@ impl LoopUnrollPass {
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
         println!("\ncontinue_inside {:?} with {} ", bucket, env);
         let interpreter = self.memory.build_interpreter(self);
-        interpreter.execute_block_bucket(bucket, env.clone(), true);
+        let env = Env::new_unroll_block_env(env.clone(), &self.extractor);
+        interpreter.execute_block_bucket(bucket, env, true);
     }
 }
 
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index bbe9c107e..d28b3a8cf 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -14,12 +14,12 @@ use crate::passes::{
 use crate::passes::checks::assert_unique_ids_in_circuit;
 
 mod conditional_flattening;
-mod loop_unroll;
 mod simplification;
 mod deterministic_subcomponent_invocation;
 mod mapped_to_indexed;
 mod unknown_index_sanitization;
 mod checks;
+pub mod loop_unroll;
 
 pub const LOOP_BODY_FN_PREFIX: &str = const_format::concatcp!(GENERATED_FN_PREFIX, "loop.body.");
 

From ce54f80b958612beba1b073a9980e7769f311883 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 13 Sep 2023 13:46:26 -0500
Subject: [PATCH 21/42] minor refactoring in bucket interpreter

---
 circuit_passes/src/bucket_interpreter/mod.rs | 33 ++++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 9e4dc96dc..9325382d3 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -414,7 +414,6 @@ impl<'a> BucketInterpreter<'a> {
         &self,
         env: Env<'env>,
         name: &String,
-        interpreter: &BucketInterpreter,
         args: Vec<Value>,
         observe: bool,
     ) -> R<'env> {
@@ -424,29 +423,27 @@ impl<'a> BucketInterpreter<'a> {
         let mut new_env = if name.starts_with(LOOP_BODY_FN_PREFIX) {
             Env::new_extracted_func_env(env)
         } else {
+            //TODO: this passes lifetime 'a references into the Env which could be the one
+            //  returned which is why the lifetime 'a must outlife the return lifetime 'env
             Env::new_standard_env(self.mem, self.mem)
+            // todo!()
         };
         for (id, arg) in args.iter().enumerate() {
             new_env = new_env.set_var(id, arg.clone());
         }
 
-        // let instructions = &func.body;
-        let instructions = Ref::map(self.mem.get_function(name), |f| &f.body);
-        let interp = self.mem.switch(interpreter, name);
-        // let temp = interp.execute_instructions(
-        //     &instructions,
-        //     new_env,
-        //     observe && !interp.observer.ignore_function_calls(),
-        // );
-        // temp
+        let instructions = &self.mem.get_function(name).body;
+        let interp = self.mem.switch(self, name);
         let observe = observe && !interp.observer.ignore_function_calls();
         let mut last = (None, new_env);
-        for inst in instructions.iter() {
-            last = self.execute_instruction(inst, last.1, observe);
+        unsafe {
+            let ptr = instructions.as_ptr();
+            for i in 0..instructions.len() {
+                let inst = ptr.add(i).as_ref().unwrap();
+                last = interp.execute_instruction(inst, last.1, observe);
+            }
         }
-        last //TODO: how can I make this work!?
-             // (last.0, last.1.clone()) // or this?
-             //                          // (last.0, env.clone())
+        last
     }
 
     pub fn execute_call_bucket<'env>(
@@ -456,13 +453,14 @@ impl<'a> BucketInterpreter<'a> {
         observe: bool,
     ) -> R<'env> {
         let mut env = env;
+        // let res = (Some(Unknown), env);
         let res = if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
             (Some(Unknown), env)
         } else if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
             // The extracted loop body functions can change any values in the environment
             //  via the parameters passed to it. So interpret the function and keep the
             //  resulting Env (as if the function had executed inline).
-            self.run_function(env, &bucket.symbol, self, vec![], observe)
+            self.run_function(env, &bucket.symbol, vec![], observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
@@ -474,7 +472,8 @@ impl<'a> BucketInterpreter<'a> {
                 (Some(Unknown), env)
             } else {
                 // Ignore the resulting Env from the callee function, using the one in the current caller
-                let (v, _) = self.run_function(env.clone(), &bucket.symbol, self, args, observe);
+                let (v, _) = self.run_function(env.clone(), &bucket.symbol, args, observe);
+                v.as_ref().expect("Function argument must produce a value!");
                 (v, env)
             }
         };

From 89455e520aeffe7247ce66b987d3d55d81f65ea5 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 13 Sep 2023 14:28:34 -0500
Subject: [PATCH 22/42] fix compile, refactor ContextSwitcher

---
 .../env/extracted_func_env.rs                 | 12 +---
 .../src/bucket_interpreter/env/mod.rs         | 29 +--------
 .../bucket_interpreter/env/standard_env.rs    | 16 +----
 .../env/unrolled_block_env.rs                 | 12 +---
 .../src/bucket_interpreter/memory.rs          | 43 +++++--------
 circuit_passes/src/bucket_interpreter/mod.rs  | 64 +++++++++----------
 6 files changed, 53 insertions(+), 123 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index a39e28e1d..759dfce34 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -5,7 +5,7 @@ use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
-use super::{Env, ContextSwitcher, LibraryAccess};
+use super::{Env, LibraryAccess};
 
 /// This Env is used to process functions created when extracting loop bodies into
 /// `LOOP_BODY_FN_PREFIX` functions.
@@ -19,15 +19,6 @@ impl Display for ExtractedFuncEnvData<'_> {
         self.base.fmt(f)
     }
 }
-impl ContextSwitcher for ExtractedFuncEnvData<'_> {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a> {
-        self.base.switch(interpreter, scope)
-    }
-}
 
 impl LibraryAccess for ExtractedFuncEnvData<'_> {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
@@ -56,6 +47,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
         //NOTE: `signal_idx` will always be 0 for the fixed* parameters
+        assert_eq!(signal_idx, 0);
         println!("TODO: must handle args here in addition to subcomps");
         // self.base.get_subcmp_signal(subcmp_idx, signal_idx)
         Value::Unknown
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 2a6feffb0..4ac255b53 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -14,14 +14,6 @@ mod standard_env;
 mod unrolled_block_env;
 mod extracted_func_env;
 
-pub trait ContextSwitcher {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a>;
-}
-
 pub trait LibraryAccess {
     fn get_function(&self, name: &String) -> Ref<FunctionCode>;
     fn get_template(&self, name: &String) -> Ref<TemplateCode>;
@@ -89,20 +81,6 @@ impl Display for Env<'_> {
     }
 }
 
-impl ContextSwitcher for Env<'_> {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a> {
-        match self {
-            Env::Standard(d) => d.switch(interpreter, scope),
-            Env::UnrolledBlock(d) => d.switch(interpreter, scope),
-            Env::ExtractedFunction(d) => d.switch(interpreter, scope),
-        }
-    }
-}
-
 impl LibraryAccess for Env<'_> {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
         match self {
@@ -122,11 +100,8 @@ impl LibraryAccess for Env<'_> {
 }
 
 impl<'a> Env<'a> {
-    pub fn new_standard_env(
-        libs: &'a dyn LibraryAccess,
-        context_switcher: &'a dyn ContextSwitcher,
-    ) -> Self {
-        Env::Standard(StandardEnvData::new(libs, context_switcher))
+    pub fn new_standard_env(libs: &'a dyn LibraryAccess) -> Self {
+        Env::Standard(StandardEnvData::new(libs))
     }
 
     pub fn new_unroll_block_env(inner: Env<'a>, extractor: &'a LoopBodyExtractor) -> Self {
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index dc3e5a05c..d79589234 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -5,7 +5,7 @@ use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
-use super::{SubcmpEnv, ContextSwitcher, LibraryAccess};
+use super::{SubcmpEnv, LibraryAccess};
 
 #[derive(Clone)]
 pub struct StandardEnvData<'a> {
@@ -13,7 +13,6 @@ pub struct StandardEnvData<'a> {
     pub signals: HashMap<usize, Value>,
     pub subcmps: HashMap<usize, SubcmpEnv>,
     pub libs: &'a dyn LibraryAccess,
-    pub context_switcher: &'a dyn ContextSwitcher,
 }
 
 impl Display for StandardEnvData<'_> {
@@ -26,16 +25,6 @@ impl Display for StandardEnvData<'_> {
     }
 }
 
-impl ContextSwitcher for StandardEnvData<'_> {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a> {
-        self.context_switcher.switch(interpreter, scope)
-    }
-}
-
 impl LibraryAccess for StandardEnvData<'_> {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
         self.libs.get_function(name)
@@ -47,13 +36,12 @@ impl LibraryAccess for StandardEnvData<'_> {
 }
 
 impl<'a> StandardEnvData<'a> {
-    pub fn new(libs: &'a dyn LibraryAccess, context_switcher: &'a dyn ContextSwitcher) -> Self {
+    pub fn new(libs: &'a dyn LibraryAccess) -> Self {
         StandardEnvData {
             vars: Default::default(),
             signals: Default::default(),
             subcmps: Default::default(),
             libs,
-            context_switcher,
         }
     }
 
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index 7614a2d22..f530b9bc6 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -8,7 +8,7 @@ use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::LOOP_BODY_FN_PREFIX;
 use crate::passes::loop_unroll::body_extractor::LoopBodyExtractor;
-use super::{Env, ContextSwitcher, LibraryAccess};
+use super::{Env, LibraryAccess};
 
 /// This Env is used by the loop unroller to process the BlockBucket containing a
 /// unrolled loop specifically handling the case where the LibraryAccess does not
@@ -26,16 +26,6 @@ impl Display for UnrolledBlockEnvData<'_> {
     }
 }
 
-impl ContextSwitcher for UnrolledBlockEnvData<'_> {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a> {
-        self.base.switch(interpreter, scope)
-    }
-}
-
 impl LibraryAccess for UnrolledBlockEnvData<'_> {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
         if name.starts_with(LOOP_BODY_FN_PREFIX) {
diff --git a/circuit_passes/src/bucket_interpreter/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
index 3bc77d7b6..61b08ffbc 100644
--- a/circuit_passes/src/bucket_interpreter/memory.rs
+++ b/circuit_passes/src/bucket_interpreter/memory.rs
@@ -7,8 +7,7 @@ use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::env::{ContextSwitcher, LibraryAccess};
-use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::env::{Env, LibraryAccess};
 use crate::bucket_interpreter::observer::InterpreterObserver;
 
 pub struct PassMemory {
@@ -41,20 +40,6 @@ impl PassMemory {
         }
     }
 
-    pub fn set_scope(&self, template: &TemplateCode) {
-        self.current_scope.replace(template.header.clone());
-    }
-
-    pub fn run_template(&self, observer: &dyn InterpreterObserver, template: &TemplateCode) {
-        assert!(!self.current_scope.borrow().is_empty());
-        if cfg!(debug_assertions) {
-            println!("Running template {}", self.current_scope.borrow());
-        }
-        let interpreter = self.build_interpreter(observer);
-        let env = Env::new_standard_env(self, self);
-        interpreter.execute_instructions(&template.body, env, true);
-    }
-
     pub fn build_interpreter<'a>(
         &'a self,
         observer: &'a dyn InterpreterObserver,
@@ -62,7 +47,7 @@ impl PassMemory {
         self.build_interpreter_with_scope(observer, self.current_scope.borrow().to_string())
     }
 
-    fn build_interpreter_with_scope<'a>(
+    pub fn build_interpreter_with_scope<'a>(
         &'a self,
         observer: &'a dyn InterpreterObserver,
         scope: String,
@@ -70,6 +55,20 @@ impl PassMemory {
         BucketInterpreter::init(observer, self, scope)
     }
 
+    pub fn set_scope(&self, template: &TemplateCode) {
+        self.current_scope.replace(template.header.clone());
+    }
+
+    pub fn run_template(&self, observer: &dyn InterpreterObserver, template: &TemplateCode) {
+        assert!(!self.current_scope.borrow().is_empty());
+        if cfg!(debug_assertions) {
+            println!("Running template {}", self.current_scope.borrow());
+        }
+        let interpreter = self.build_interpreter(observer);
+        let env = Env::new_standard_env(self);
+        interpreter.execute_instructions(&template.body, env, true);
+    }
+
     pub fn add_template(&self, template: &TemplateCode) {
         self.templates_library.borrow_mut().insert(template.header.clone(), (*template).clone());
     }
@@ -142,16 +141,6 @@ impl PassMemory {
     }
 }
 
-impl ContextSwitcher for PassMemory {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &String,
-    ) -> BucketInterpreter<'a> {
-        self.build_interpreter_with_scope(interpreter.observer, scope.clone())
-    }
-}
-
 impl LibraryAccess for PassMemory {
     fn get_function(&self, name: &String) -> Ref<FunctionCode> {
         Ref::map(self.functions_library.borrow(), |map| &map[name])
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 9325382d3..db1b17b0a 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -4,7 +4,6 @@ pub mod memory;
 pub mod observer;
 pub(crate) mod operations;
 
-use std::cell::Ref;
 use std::vec;
 use circom_algebra::modular_arithmetic;
 use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
@@ -19,7 +18,7 @@ use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 use crate::passes::LOOP_BODY_FN_PREFIX;
-use self::env::{LibraryAccess, ContextSwitcher};
+use self::env::LibraryAccess;
 
 pub struct BucketInterpreter<'a> {
     pub(crate) observer: &'a dyn InterpreterObserver,
@@ -410,40 +409,39 @@ impl<'a> BucketInterpreter<'a> {
         (computed_value, env)
     }
 
-    fn run_function<'env>(
-        &self,
-        env: Env<'env>,
-        name: &String,
-        args: Vec<Value>,
-        observe: bool,
-    ) -> R<'env> {
+    fn run_function_loopbody<'env>(&self, name: &String, env: Env<'env>, observe: bool) -> R<'env> {
         if cfg!(debug_assertions) {
             println!("Running function {}", name);
         }
-        let mut new_env = if name.starts_with(LOOP_BODY_FN_PREFIX) {
-            Env::new_extracted_func_env(env)
-        } else {
-            //TODO: this passes lifetime 'a references into the Env which could be the one
-            //  returned which is why the lifetime 'a must outlife the return lifetime 'env
-            Env::new_standard_env(self.mem, self.mem)
-            // todo!()
-        };
-        for (id, arg) in args.iter().enumerate() {
-            new_env = new_env.set_var(id, arg.clone());
-        }
-
-        let instructions = &self.mem.get_function(name).body;
-        let interp = self.mem.switch(self, name);
+        let mut res = (None, Env::new_extracted_func_env(env.clone()));
+        let interp = self.mem.build_interpreter_with_scope(self.observer, name.clone());
         let observe = observe && !interp.observer.ignore_function_calls();
-        let mut last = (None, new_env);
+        let instructions = &env.get_function(name).body;
         unsafe {
             let ptr = instructions.as_ptr();
             for i in 0..instructions.len() {
                 let inst = ptr.add(i).as_ref().unwrap();
-                last = interp.execute_instruction(inst, last.1, observe);
+                res = interp.execute_instruction(inst, res.1, observe);
             }
         }
-        last
+        res
+    }
+
+    fn run_function_basic<'env>(&self, name: &String, args: Vec<Value>, observe: bool) -> Value {
+        if cfg!(debug_assertions) {
+            println!("Running function {}", name);
+        }
+        let mut new_env = Env::new_standard_env(self.mem);
+        for (id, arg) in args.iter().enumerate() {
+            new_env = new_env.set_var(id, arg.clone());
+        }
+        let interp = self.mem.build_interpreter_with_scope(self.observer, name.clone());
+        let (v, _) = interp.execute_instructions(
+            &self.mem.get_function(name).body,
+            new_env,
+            observe && !interp.observer.ignore_function_calls(),
+        );
+        v.expect("Function must produce a value!")
     }
 
     pub fn execute_call_bucket<'env>(
@@ -460,7 +458,7 @@ impl<'a> BucketInterpreter<'a> {
             // The extracted loop body functions can change any values in the environment
             //  via the parameters passed to it. So interpret the function and keep the
             //  resulting Env (as if the function had executed inline).
-            self.run_function(env, &bucket.symbol, vec![], observe)
+            self.run_function_loopbody(&bucket.symbol, env, observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
@@ -468,14 +466,12 @@ impl<'a> BucketInterpreter<'a> {
                 env = new_env;
                 args.push(value.expect("Function argument must produce a value!"));
             }
-            if args.iter().any(|v| v.is_unknown()) {
-                (Some(Unknown), env)
+            let v = if args.iter().any(|v| v.is_unknown()) {
+                Unknown
             } else {
-                // Ignore the resulting Env from the callee function, using the one in the current caller
-                let (v, _) = self.run_function(env.clone(), &bucket.symbol, args, observe);
-                v.as_ref().expect("Function argument must produce a value!");
-                (v, env)
-            }
+                self.run_function_basic(&bucket.symbol, args, observe)
+            };
+            (Some(v), env)
         };
 
         // Write the result in the destination according to the ReturnType

From 92324253ffcc41a6dfee82297d79903d2a3ab009 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 13 Sep 2023 19:50:27 -0500
Subject: [PATCH 23/42] simplify type for fixed variant parameters

---
 circom/tests/loops/inner_loop_simple.circom   |  78 +++++-----
 circom/tests/loops/inner_loops2.circom        | 136 ++++++++----------
 circom/tests/loops/inner_loops3.circom        | 121 +++++++---------
 circom/tests/loops/inner_loops6.circom        |  52 ++++---
 circom/tests/loops/simple_variant_idx.circom  |  32 ++---
 circom/tests/loops/vanguard-uc-comp.circom    |  46 +++---
 .../tests/loops/variant_idx_in_loop_A.circom  |  14 +-
 .../tests/loops/variant_idx_in_loop_B.circom  |  26 ++--
 .../env/extracted_func_env.rs                 |   5 +
 .../src/bucket_interpreter/env/mod.rs         |   8 ++
 .../bucket_interpreter/env/standard_env.rs    |  14 ++
 .../env/unrolled_block_env.rs                 |   7 +
 circuit_passes/src/bucket_interpreter/mod.rs  |   7 +-
 .../src/passes/loop_unroll/body_extractor.rs  |   6 +-
 code_producers/src/llvm_elements/fr.rs        |   6 +-
 compiler/src/circuit_design/circuit.rs        |   3 +
 .../load_bucket.rs                            |   7 +-
 .../store_bucket.rs                           |   7 +-
 18 files changed, 286 insertions(+), 289 deletions(-)

diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index d06b4fefd..01f7640b2 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -21,7 +21,7 @@ component main = InnerLoops(2, 3);
 // %lvars = { n, m, b[0], b[1], i, j }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -31,7 +31,7 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
 //CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
 //CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
 //CHECK-NEXT:   store i256 %3, i256* %4, align 4
@@ -50,7 +50,7 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -60,7 +60,7 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
 //CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
 //CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
 //CHECK-NEXT:   store i256 %3, i256* %4, align 4
@@ -84,47 +84,41 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   store i256 0, i256* %6, align 4
 //CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, [0 x i256]* %9)
-//CHECK-NEXT:   %10 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, [0 x i256]* %12)
-//CHECK-NEXT:   %13 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %15 = bitcast i256* %14 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0, [0 x i256]* %15)
-//CHECK-NEXT:   %16 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %17, i256 1)
-//CHECK-NEXT:   %18 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %18, align 4
-//CHECK-NEXT:   %19 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %19, align 4
-//CHECK-NEXT:   %20 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
-//CHECK-NEXT:   %23 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
-//CHECK-NEXT:   %26 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %28 = bitcast i256* %27 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %26, [0 x i256]* %0, [0 x i256]* %28)
-//CHECK-NEXT:   %29 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %30 = load i256, i256* %29, align 4
-//CHECK-NEXT:   %call.fr_add23 = call i256 @fr_add(i256 %30, i256 1)
-//CHECK-NEXT:   %31 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 %call.fr_add23, i256* %31, align 4
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
+//CHECK-NEXT:   %13 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   %16 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %16, align 4
+//CHECK-NEXT:   %17 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   %19 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
+//CHECK-NEXT:   %21 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %21, [0 x i256]* %0, i256* %22)
+//CHECK-NEXT:   %23 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %24 = load i256, i256* %23, align 4
+//CHECK-NEXT:   %call.fr_add23 = call i256 @fr_add(i256 %24, i256 1)
+//CHECK-NEXT:   %25 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add23, i256* %25, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %32 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %33 = load i256, i256* %32, align 4
-//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:   store i256 %33, i256* %34, align 4
+//CHECK-NEXT:   %26 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %27 = load i256, i256* %26, align 4
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %27, i256* %28, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index 07e98ca23..98f20ad74 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -56,12 +56,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -80,12 +80,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -104,12 +104,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -128,12 +128,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -170,104 +170,90 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %12 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %14 = bitcast i256* %13 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, [0 x i256]* %14)
-//CHECK-NEXT:   %15 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %17 = bitcast i256* %16 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, [0 x i256]* %17)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %18 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 2, i256* %18, align 4
+//CHECK-NEXT:   %16 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 2, i256* %16, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %19 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
-//CHECK-NEXT:   store i256 0, i256* %19, align 4
+//CHECK-NEXT:   %17 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %17, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %18 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %18, [0 x i256]* %0, i256* %19)
 //CHECK-NEXT:   %20 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
-//CHECK-NEXT:   %23 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
-//CHECK-NEXT:   %26 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %28 = bitcast i256* %27 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %26, [0 x i256]* %0, [0 x i256]* %28)
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, i256* %21)
+//CHECK-NEXT:   %22 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %29 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 3, i256* %29, align 4
+//CHECK-NEXT:   %24 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 3, i256* %24, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %30 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
-//CHECK-NEXT:   store i256 0, i256* %30, align 4
+//CHECK-NEXT:   %25 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %25, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %31 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %33 = bitcast i256* %32 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %31, [0 x i256]* %0, [0 x i256]* %33)
-//CHECK-NEXT:   %34 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %36 = bitcast i256* %35 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %34, [0 x i256]* %0, [0 x i256]* %36)
-//CHECK-NEXT:   %37 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %39 = bitcast i256* %38 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %37, [0 x i256]* %0, [0 x i256]* %39)
-//CHECK-NEXT:   %40 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %42 = bitcast i256* %41 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %40, [0 x i256]* %0, [0 x i256]* %42)
+//CHECK-NEXT:   %26 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   %28 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
+//CHECK-NEXT:   %30 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, i256* %31)
+//CHECK-NEXT:   %32 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %32, [0 x i256]* %0, i256* %33)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %43 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 4, i256* %43, align 4
+//CHECK-NEXT:   %34 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 4, i256* %34, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %44 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
-//CHECK-NEXT:   store i256 0, i256* %44, align 4
+//CHECK-NEXT:   %35 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %35, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %45 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   %47 = bitcast i256* %46 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %45, [0 x i256]* %0, [0 x i256]* %47)
-//CHECK-NEXT:   %48 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %50 = bitcast i256* %49 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %48, [0 x i256]* %0, [0 x i256]* %50)
-//CHECK-NEXT:   %51 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %52 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %53 = bitcast i256* %52 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %51, [0 x i256]* %0, [0 x i256]* %53)
-//CHECK-NEXT:   %54 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %56 = bitcast i256* %55 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %54, [0 x i256]* %0, [0 x i256]* %56)
-//CHECK-NEXT:   %57 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %59 = bitcast i256* %58 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %57, [0 x i256]* %0, [0 x i256]* %59)
+//CHECK-NEXT:   %36 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %36, [0 x i256]* %0, i256* %37)
+//CHECK-NEXT:   %38 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %38, [0 x i256]* %0, i256* %39)
+//CHECK-NEXT:   %40 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, i256* %41)
+//CHECK-NEXT:   %42 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %42, [0 x i256]* %0, i256* %43)
+//CHECK-NEXT:   %44 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %44, [0 x i256]* %0, i256* %45)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %60 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 5, i256* %60, align 4
+//CHECK-NEXT:   %46 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 5, i256* %46, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index a3fcfcaa9..e0278271c 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -47,12 +47,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -71,12 +71,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -95,12 +95,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -119,12 +119,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -156,84 +156,71 @@ component main = InnerLoops(5);
 //CHECK-NEXT:  unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, [0 x i256]* %12)
-//CHECK-NEXT:   %13 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %15 = bitcast i256* %14 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %13, [0 x i256]* %0, [0 x i256]* %15)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %16 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 0, i256* %16, align 4
+//CHECK-NEXT:   %14 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
 //CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %19 = bitcast i256* %18 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, [0 x i256]* %19)
-//CHECK-NEXT:   %20 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, [0 x i256]* %22)
-//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %25 = bitcast i256* %24 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %23, [0 x i256]* %0, [0 x i256]* %25)
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %26 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 0, i256* %26, align 4
+//CHECK-NEXT:   %21 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %21, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %27 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %29 = bitcast i256* %28 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %27, [0 x i256]* %0, [0 x i256]* %29)
-//CHECK-NEXT:   %30 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %32 = bitcast i256* %31 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, [0 x i256]* %32)
-//CHECK-NEXT:   %33 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %35 = bitcast i256* %34 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %33, [0 x i256]* %0, [0 x i256]* %35)
-//CHECK-NEXT:   %36 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %38 = bitcast i256* %37 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %36, [0 x i256]* %0, [0 x i256]* %38)
+//CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
+//CHECK-NEXT:   %24 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %24, [0 x i256]* %0, i256* %25)
+//CHECK-NEXT:   %26 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   %28 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %39 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
-//CHECK-NEXT:   store i256 0, i256* %39, align 4
+//CHECK-NEXT:   %30 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %30, align 4
 //CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %40 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   %42 = bitcast i256* %41 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, [0 x i256]* %42)
-//CHECK-NEXT:   %43 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %45 = bitcast i256* %44 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %43, [0 x i256]* %0, [0 x i256]* %45)
-//CHECK-NEXT:   %46 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %48 = bitcast i256* %47 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %46, [0 x i256]* %0, [0 x i256]* %48)
-//CHECK-NEXT:   %49 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %51 = bitcast i256* %50 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %49, [0 x i256]* %0, [0 x i256]* %51)
-//CHECK-NEXT:   %52 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %54 = bitcast i256* %53 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %52, [0 x i256]* %0, [0 x i256]* %54)
+//CHECK-NEXT:   %31 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %31, [0 x i256]* %0, i256* %32)
+//CHECK-NEXT:   %33 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %33, [0 x i256]* %0, i256* %34)
+//CHECK-NEXT:   %35 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %35, [0 x i256]* %0, i256* %36)
+//CHECK-NEXT:   %37 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %37, [0 x i256]* %0, i256* %38)
+//CHECK-NEXT:   %39 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//DELETE:   %40 = bitcast i256* %40 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %39, [0 x i256]* %0, i256* %40)
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
index f44f6aa7e..733beea05 100644
--- a/circom/tests/loops/inner_loops6.circom
+++ b/circom/tests/loops/inner_loops6.circom
@@ -35,14 +35,14 @@ component main = Num2Bits(2);
 //	out[3] = in;
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -59,14 +59,14 @@ component main = Num2Bits(2);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -88,32 +88,28 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   store i256 0, i256* %3, align 4
 //CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6)
-//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, [0 x i256]* %9)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
 //CHECK-NEXT:   %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
-//CHECK-NEXT:   %12 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
-//CHECK-NEXT:   %13 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 0, i256* %13, align 4
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   %11 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %11, align 4
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
 //CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16)
-//CHECK-NEXT:   %17 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %19 = bitcast i256* %18 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, [0 x i256]* %19)
-//CHECK-NEXT:   %20 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %21 = load i256, i256* %20, align 4
-//CHECK-NEXT:   %call.fr_add15 = call i256 @fr_add(i256 %21, i256 1)
-//CHECK-NEXT:   %22 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add15, i256* %22, align 4
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
+//CHECK-NEXT:   %16 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
+//CHECK-NEXT:   %call.fr_add15 = call i256 @fr_add(i256 %17, i256 1)
+//CHECK-NEXT:   %18 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add15, i256* %18, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
index d272890ec..193c060cf 100644
--- a/circom/tests/loops/simple_variant_idx.circom
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -26,16 +26,16 @@ component main = SimpleVariantIdx(3);
 // NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0, i256* %fixed_1){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY:
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %3 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   %4 = load i256, i256* %3, align 4
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   store i256 %4, i256* %5, align 4
@@ -58,20 +58,14 @@ component main = SimpleVariantIdx(3);
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %6 = bitcast i256* %5 to [0 x i256]*
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, [0 x i256]* %6, [0 x i256]* %8)
-//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %11, [0 x i256]* %13)
-//CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
+//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/vanguard-uc-comp.circom b/circom/tests/loops/vanguard-uc-comp.circom
index dfdfdb517..02891ff8c 100644
--- a/circom/tests/loops/vanguard-uc-comp.circom
+++ b/circom/tests/loops/vanguard-uc-comp.circom
@@ -27,7 +27,7 @@ component main = Num2Bits(2);
 // NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0, [0 x i256]* %fixed_1, [0 x i256]* %fixed_2, [0 x i256]* %fixed_3){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0, i256* %fixed_1, i256* %fixed_2, i256* %fixed_3){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -38,14 +38,14 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
 //CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   store i256 %call.fr_bit_and, i256* %4, align 4
 //CHECK-NEXT:   br label %assert2
 //CHECK-EMPTY: 
 //CHECK-NEXT: assert2:
-//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   %8 = load i256, i256* %7, align 4
 //CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 1)
 //CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %6, i256 %call.fr_sub)
@@ -58,7 +58,7 @@ component main = Num2Bits(2);
 //CHECK-NEXT: store3:
 //CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   %10 = load i256, i256* %9, align 4
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %fixed_{{.*}}, i32 0, i32 0
+//CHECK-NEXT:   %11 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
 //CHECK-NEXT:   %12 = load i256, i256* %11, align 4
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   %14 = load i256, i256* %13, align 4
@@ -95,32 +95,24 @@ component main = Num2Bits(2);
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %7 = bitcast i256* %6 to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %9 = bitcast i256* %8 to [0 x i256]*
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %11 = bitcast i256* %10 to [0 x i256]*
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %13 = bitcast i256* %12 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %7, [0 x i256]* %9, [0 x i256]* %11, [0 x i256]* %13)
-//CHECK-NEXT:   %14 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %16 = bitcast i256* %15 to [0 x i256]*
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %18 = bitcast i256* %17 to [0 x i256]*
-//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %20 = bitcast i256* %19 to [0 x i256]*
-//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %22 = bitcast i256* %21 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %14, [0 x i256]* %0, [0 x i256]* %16, [0 x i256]* %18, [0 x i256]* %20, [0 x i256]* %22)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* %7, i256* %8, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %14)
 //CHECK-NEXT:   br label %assert{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: assert{{[0-9]+}}:
-//CHECK-NEXT:   %23 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %24 = load i256, i256* %23, align 4
-//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-//CHECK-NEXT:   %26 = load i256, i256* %25, align 4
-//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %24, i256 %26)
+//CHECK-NEXT:   %15 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %16, i256 %18)
 //CHECK-NEXT:   call void @__assert(i1 %call.fr_eq)
 //CHECK-NEXT:   %constraint = alloca i1, align 1
 //CHECK-NEXT:   call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
diff --git a/circom/tests/loops/variant_idx_in_loop_A.circom b/circom/tests/loops/variant_idx_in_loop_A.circom
index f196bf25b..76253588f 100644
--- a/circom/tests/loops/variant_idx_in_loop_A.circom
+++ b/circom/tests/loops/variant_idx_in_loop_A.circom
@@ -18,14 +18,14 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -46,10 +46,8 @@ component main = VariantIndex(2);
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %5 = bitcast i256* %4 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, [0 x i256]* %5)
-//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %6, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, i256* %4)
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_B.circom b/circom/tests/loops/variant_idx_in_loop_B.circom
index e6020c39a..35750e5ba 100644
--- a/circom/tests/loops/variant_idx_in_loop_B.circom
+++ b/circom/tests/loops/variant_idx_in_loop_B.circom
@@ -20,14 +20,14 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, [0 x i256]* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %fixed_0, i32 0, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_0, i32 0
 //CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -49,23 +49,21 @@ component main = VariantIndex(2);
 //CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
-//CHECK-NEXT:   %8 = bitcast i256* %7 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, [0 x i256]* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 2
-//CHECK-NEXT:   %12 = bitcast i256* %11 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %9, [0 x i256]* %0, [0 x i256]* %12)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %8, [0 x i256]* %0, i256* %10)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %13 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %11 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %13 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   %14 = load i256, i256* %13, align 4
-//CHECK-NEXT:   %15 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 %16)
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %17, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 759dfce34..9a8bb2bab 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -88,6 +88,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
         ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)) }
     }
 
+    pub fn set_all_to_unk(self) -> Self {
+        todo!();
+        ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()) }
+    }
+
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         todo!();
         ExtractedFuncEnvData { base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)) }
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index 4ac255b53..a623cc49d 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -194,6 +194,14 @@ impl<'a> Env<'a> {
         }
     }
 
+    pub fn set_all_to_unk(self) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_all_to_unk()),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_all_to_unk()),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_all_to_unk()),
+        }
+    }
+
     /// Sets all the signals of the subcmp to UNK
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         match self {
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index d79589234..b863896d5 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -91,6 +91,20 @@ impl<'a> StandardEnvData<'a> {
         copy
     }
 
+    pub fn set_all_to_unk(self) -> Self {
+        let mut copy = self;
+        for (_, v) in copy.vars.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.signals.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.subcmps.iter_mut() {
+            v.signals.clear();
+        }
+        copy
+    }
+
     /// Sets all the signals of the subcmp to UNK
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         let mut copy = self;
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index f530b9bc6..08f3f112e 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -95,6 +95,13 @@ impl<'a> UnrolledBlockEnvData<'a> {
         }
     }
 
+    pub fn set_all_to_unk(self) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_all_to_unk()),
+            extractor: self.extractor,
+        }
+    }
+
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
         UnrolledBlockEnvData {
             base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)),
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index db1b17b0a..906517527 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -451,14 +451,17 @@ impl<'a> BucketInterpreter<'a> {
         observe: bool,
     ) -> R<'env> {
         let mut env = env;
-        // let res = (Some(Unknown), env);
         let res = if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
             (Some(Unknown), env)
         } else if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
             // The extracted loop body functions can change any values in the environment
             //  via the parameters passed to it. So interpret the function and keep the
             //  resulting Env (as if the function had executed inline).
-            self.run_function_loopbody(&bucket.symbol, env, observe)
+            // self.run_function_loopbody(&bucket.symbol, env, observe)
+            //
+            //TODO: TEMP: old approach
+            env = env.set_all_to_unk();
+            (Some(Unknown), env)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index a093cd7af..46640efd4 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -83,7 +83,8 @@ impl LoopBodyExtractor {
             Param { name: String::from("signals"), length: vec![0] },
         ];
         for i in 0..bucket_arg_order.len() {
-            params.push(Param { name: format!("fixed_{}", i), length: vec![0] });
+            // Use empty vector for the length to denote scalar (non-array) arguments
+            params.push(Param { name: format!("fixed_{}", i), length: vec![] });
         }
 
         // Copy loop body and add a "return void" at the end
@@ -196,7 +197,8 @@ impl LoopBodyExtractor {
             .0
     }
 
-    //return value key is iteration number
+    // Key for the returned map is iteration number.
+    // The BTreeMap that is returned maps bucket to fixed* argument index.
     fn compute_extra_args(
         recorder: &EnvRecorder,
     ) -> (HashMap<usize, Vec<(AddressType, Value)>>, BTreeMap<BucketId, usize>) {
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index f98b6249f..ed4297660 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -305,6 +305,7 @@ fn identity_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
 
 fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let bigint_ty = bigint_type(producer);
+    let ret_ty = bigint_ty.ptr_type(Default::default());
     let val_ty = bigint_ty.array_type(0).ptr_type(Default::default());
     let func = create_function(
         producer,
@@ -312,7 +313,7 @@ fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
         0,
         "",
         FR_INDEX_ARR_PTR,
-        val_ty.fn_type(&[val_ty.into(), bigint_ty.into()], false),
+        ret_ty.fn_type(&[val_ty.into(), bigint_ty.into()], false),
     );
     add_inline_attribute(producer, func);
 
@@ -325,8 +326,7 @@ fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     producer.set_current_bb(main);
     let gep =
         create_gep(producer, arr.into_pointer_value(), &[zero(producer), idx.into_int_value()]);
-    let cast = producer.llvm().builder.build_bitcast(gep.into_pointer_value(), val_ty, "");
-    create_return(producer, cast.into_pointer_value());
+    create_return(producer, gep.into_pointer_value());
 }
 
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index f80ba5b0d..d3ed67dc3 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -86,10 +86,13 @@ impl WriteLLVMIR for Circuit {
                     // This section is a little more complicated than desired because IntType and ArrayType do
                     //  not have a common Trait that defines the `array_type` and `ptr_type` member functions.
                     let ty = match &p.length.len() {
+                        // [] -> i256*
                         0 => bigint_type(producer).ptr_type(Default::default()),
+                        // [A] -> [A x i256]*
                         1 => bigint_type(producer)
                             .array_type(p.length[0] as u32)
                             .ptr_type(Default::default()),
+                        // [A,B,C,...] -> [C x [B x [A x i256]*]*]*
                         _ => {
                             let mut temp = bigint_type(producer).array_type(p.length[0] as u32);
                             for size in &p.length[1..] {
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index dac5f25f0..d1b3b2ee2 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -106,7 +106,12 @@ impl WriteLLVMIR for LoadBucket {
                     AddressType::SubcmpSignal { cmp_address, ..  } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer), index])
+                        if subcmp.get_type().get_element_type().is_array_type() {
+                            create_gep(producer, subcmp, &[zero(producer), index])
+                        } else {
+                            assert_eq!(zero(producer), index);
+                            create_gep(producer, subcmp, &[index])
+                        }
                     }
                 }.into_pointer_value();
                 create_load(producer, gep)
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 7d72dfed2..371a7124b 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -125,7 +125,12 @@ impl StoreBucket{
                     AddressType::SubcmpSignal { cmp_address, .. } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer), dest_index])
+                        if subcmp.get_type().get_element_type().is_array_type() {
+                            create_gep(producer, subcmp, &[zero(producer), dest_index])
+                        } else {
+                            assert_eq!(zero(producer), dest_index);
+                            create_gep(producer, subcmp, &[dest_index])
+                        }
                     }
                 }.into_pointer_value();
                 if context.size > 1 {

From 7bfdff95290c4794a7fe1e6a055b362d045a0ef7 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 20 Sep 2023 13:04:37 -0500
Subject: [PATCH 24/42] add 'counter_override' for extracted loop body function
 to access subcmp counter

also a few minor rust warning fixes and code cleanup
---
 .../deterministic_subcomponent_invocation.rs  |  2 +
 .../loop_unroll/extracted_location_updater.rs |  2 +
 circuit_passes/src/passes/mod.rs              |  2 +
 .../src/llvm_elements/instructions.rs         |  2 +-
 code_producers/src/llvm_elements/values.rs    |  3 +-
 .../address_type.rs                           | 30 +++++++----
 .../ir_interface.rs                           |  2 +-
 .../load_bucket.rs                            | 51 +++++++++++--------
 .../store_bucket.rs                           |  8 +--
 .../intermediate_representation/translate.rs  |  9 ++--
 compiler/src/ir_processing/reduce_stack.rs    |  4 +-
 11 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index a834f7535..489719838 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -149,6 +149,7 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
                 uniform_parallel_value,
                 is_output,
                 input_information,
+                counter_override,
             } => AddressType::SubcmpSignal {
                 cmp_address: self.transform_instruction(&cmp_address),
                 uniform_parallel_value: uniform_parallel_value.clone(),
@@ -158,6 +159,7 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
                 } else {
                     input_information.clone()
                 },
+                counter_override: *counter_override,
             },
             x => x.clone(),
         }
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index c21c4d36c..563d996c7 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -19,6 +19,7 @@ impl ExtractedFunctionLocationUpdater {
             bucket.address_type = AddressType::SubcmpSignal {
                 cmp_address: new_u32_value(bucket, x),
                 uniform_parallel_value: None,
+                counter_override: false,
                 is_output: false,
                 input_information: InputInformation::NoInput,
             };
@@ -45,6 +46,7 @@ impl ExtractedFunctionLocationUpdater {
             bucket.dest_address_type = AddressType::SubcmpSignal {
                 cmp_address: new_u32_value(bucket, x),
                 uniform_parallel_value: None,
+                counter_override: false,
                 is_output: false,
                 input_information: InputInformation::NoInput,
             };
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index d28b3a8cf..a00aebf02 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -154,11 +154,13 @@ pub trait CircuitTransformationPass {
                 uniform_parallel_value,
                 is_output,
                 input_information,
+                counter_override,
             } => AddressType::SubcmpSignal {
                 cmp_address: self.transform_instruction(cmp_address),
                 uniform_parallel_value: uniform_parallel_value.clone(),
                 is_output: *is_output,
                 input_information: input_information.clone(),
+                counter_override: *counter_override,
             },
             x => x.clone(),
         }
diff --git a/code_producers/src/llvm_elements/instructions.rs b/code_producers/src/llvm_elements/instructions.rs
index 9e08d7292..e5290c689 100644
--- a/code_producers/src/llvm_elements/instructions.rs
+++ b/code_producers/src/llvm_elements/instructions.rs
@@ -5,7 +5,7 @@ use inkwell::values::{
     AnyValue, AnyValueEnum, BasicMetadataValueEnum, BasicValue, BasicValueEnum, FunctionValue,
     InstructionOpcode, InstructionValue, IntMathValue, IntValue, PhiValue, PointerValue,
 };
-use crate::llvm_elements::{LLVMIRProducer};
+use crate::llvm_elements::LLVMIRProducer;
 use crate::llvm_elements::fr::{FR_MUL_FN_NAME, FR_LT_FN_NAME};
 use crate::llvm_elements::functions::create_bb;
 use crate::llvm_elements::types::{bigint_type, i32_type};
diff --git a/code_producers/src/llvm_elements/values.rs b/code_producers/src/llvm_elements/values.rs
index e1549acbc..987b4125b 100644
--- a/code_producers/src/llvm_elements/values.rs
+++ b/code_producers/src/llvm_elements/values.rs
@@ -1,7 +1,6 @@
 use inkwell::types::StringRadix;
 use inkwell::values::{AnyValue, AnyValueEnum, IntValue};
-
-use crate::llvm_elements::{LLVMIRProducer};
+use crate::llvm_elements::LLVMIRProducer;
 use crate::llvm_elements::types::bigint_type;
 
 pub fn create_literal_u32<'a>(producer: &dyn LLVMIRProducer<'a>, val: u64) -> IntValue<'a> {
diff --git a/compiler/src/intermediate_representation/address_type.rs b/compiler/src/intermediate_representation/address_type.rs
index 1501d54c2..b31a6afa4 100644
--- a/compiler/src/intermediate_representation/address_type.rs
+++ b/compiler/src/intermediate_representation/address_type.rs
@@ -11,14 +11,20 @@ pub enum StatusInput {
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum InputInformation {
     NoInput,
-    Input {status: StatusInput},
+    Input { status: StatusInput },
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum AddressType {
     Variable,
     Signal,
-    SubcmpSignal { cmp_address: InstructionPointer, uniform_parallel_value: Option<bool>, is_output: bool, input_information: InputInformation },
+    SubcmpSignal {
+        cmp_address: InstructionPointer,
+        uniform_parallel_value: Option<bool>,
+        is_output: bool,
+        input_information: InputInformation,
+        counter_override: bool,
+    },
 }
 
 impl ToString for AddressType {
@@ -27,7 +33,11 @@ impl ToString for AddressType {
         match self {
             Variable => "VARIABLE".to_string(),
             Signal => "SIGNAL".to_string(),
-            SubcmpSignal { cmp_address, .. } => format!("SUBCOMPONENT:{}", cmp_address.to_string()),
+            SubcmpSignal { cmp_address, counter_override, .. } => format!(
+                "{}:{}",
+                if *counter_override { "SUBCOMP_COUNTER" } else { "SUBCOMPONENT" },
+                cmp_address.to_string()
+            ),
         }
     }
 }
@@ -38,10 +48,12 @@ impl ToSExp for AddressType {
         match self {
             Variable => SExp::Atom("VARIABLE".to_string()),
             Signal => SExp::Atom("SIGNAL".to_string()),
-            SubcmpSignal { cmp_address, .. } => SExp::List(vec![
-                SExp::Atom("SUBCOMPONENT".to_string()),
-                cmp_address.to_sexp()
-            ])
+            SubcmpSignal { cmp_address, counter_override, .. } => SExp::List(vec![
+                SExp::Atom(
+                    if *counter_override { "SUBCOMP_COUNTER" } else { "SUBCOMPONENT" }.to_string(),
+                ),
+                cmp_address.to_sexp(),
+            ]),
         }
     }
 }
@@ -50,8 +62,8 @@ impl UpdateId for AddressType {
     fn update_id(&mut self) {
         use AddressType::*;
         match self {
-            SubcmpSignal { cmp_address, ..} => cmp_address.update_id(),
+            SubcmpSignal { cmp_address, .. } => cmp_address.update_id(),
             _ => {}
         }
     }
-}
\ No newline at end of file
+}
diff --git a/compiler/src/intermediate_representation/ir_interface.rs b/compiler/src/intermediate_representation/ir_interface.rs
index e5eb88cde..dddeec0fa 100644
--- a/compiler/src/intermediate_representation/ir_interface.rs
+++ b/compiler/src/intermediate_representation/ir_interface.rs
@@ -13,7 +13,7 @@ pub use super::store_bucket::StoreBucket;
 pub use super::log_bucket::LogBucketArg;
 pub use super::types::{InstrContext, ValueType};
 pub use super::value_bucket::ValueBucket;
-pub use super::constraint_bucket::{ConstraintBucket};
+pub use super::constraint_bucket::ConstraintBucket;
 pub use super::block_bucket::BlockBucket;
 pub use super::nop_bucket::NopBucket;
 
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index d1b3b2ee2..1da8acc6d 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -87,33 +87,44 @@ impl WriteLLVMIR for LoadBucket {
         // If we have bounds for an unknown index, we will get the base address and let the function check the bounds
         let load = match &self.bounded_fn {
             Some(name) => {
-                let arr_ptr = match &self.address_type {
-                    AddressType::Variable => producer.body_ctx().get_variable_array(producer),
-                    AddressType::Signal => producer.template_ctx().get_signal_array(producer),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
-                        let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer)])
-                    },
-                }.into_pointer_value();
-                let arr_ptr = pointer_cast(producer, arr_ptr, array_ptr_ty(producer));
-                create_call(producer, name.as_str(), &[arr_ptr.into(), index.into()])
+                let get_ptr = || {
+                    let arr_ptr = match &self.address_type {
+                        AddressType::Variable => producer.body_ctx().get_variable_array(producer),
+                        AddressType::Signal => producer.template_ctx().get_signal_array(producer),
+                        AddressType::SubcmpSignal { cmp_address, counter_override, .. } => {
+                            let addr = cmp_address.produce_llvm_ir(producer)
+                                .expect("The address of a subcomponent must yield a value!");
+                            if *counter_override {
+                                return producer.template_ctx().load_subcmp_counter(producer, addr)
+                            } else {
+                                let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
+                                create_gep(producer, subcmp, &[zero(producer)])
+                            }
+                        }
+                    };
+                    pointer_cast(producer, arr_ptr.into_pointer_value(), array_ptr_ty(producer))
+                };
+                create_call(producer, name.as_str(), &[get_ptr().into(), index.into()])
             },
             None => {
                 let gep = match &self.address_type {
-                    AddressType::Variable => producer.body_ctx().get_variable(producer, index),
-                    AddressType::Signal => producer.template_ctx().get_signal(producer, index),
-                    AddressType::SubcmpSignal { cmp_address, ..  } => {
+                    AddressType::Variable => producer.body_ctx().get_variable(producer, index).into_pointer_value(),
+                    AddressType::Signal => producer.template_ctx().get_signal(producer, index).into_pointer_value(),
+                    AddressType::SubcmpSignal { cmp_address, counter_override, ..  } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        if subcmp.get_type().get_element_type().is_array_type() {
-                            create_gep(producer, subcmp, &[zero(producer), index])
+                        if *counter_override {
+                            producer.template_ctx().load_subcmp_counter(producer, addr)
                         } else {
-                            assert_eq!(zero(producer), index);
-                            create_gep(producer, subcmp, &[index])
+                            let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
+                            if subcmp.get_type().get_element_type().is_array_type() {
+                                create_gep(producer, subcmp, &[zero(producer), index]).into_pointer_value()
+                            } else {
+                                assert_eq!(zero(producer), index);
+                                create_gep(producer, subcmp, &[index]).into_pointer_value()
+                            }
                         }
                     }
-                }.into_pointer_value();
+                };
                 create_load(producer, gep)
             },
         };
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 371a7124b..5265b4a82 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -185,14 +185,14 @@ impl StoreBucket{
             }
         }
 
-        let sub_cmp_name = match &dest {
-            LocationRule::Indexed { template_header, .. } => template_header.clone(),
-            LocationRule::Mapped { .. } => None
-        };
         // If the input information is unknown add a check that checks the counter and if its zero call the subcomponent
         // If its last just call run directly
         if let AddressType::SubcmpSignal { input_information, cmp_address, .. } = &dest_address_type {
             if let InputInformation::Input { status } = input_information {
+                let sub_cmp_name = match &dest {
+                    LocationRule::Indexed { template_header, .. } => template_header.clone(),
+                    LocationRule::Mapped { .. } => None
+                };
                 match status {
                     StatusInput::Last => {
                         let run_fn = run_fn_name(sub_cmp_name.expect("Could not get the name of the subcomponent"));
diff --git a/compiler/src/intermediate_representation/translate.rs b/compiler/src/intermediate_representation/translate.rs
index 2d10881d3..7eef90661 100644
--- a/compiler/src/intermediate_representation/translate.rs
+++ b/compiler/src/intermediate_representation/translate.rs
@@ -1196,10 +1196,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, expr.get_meta()),
                 is_output: self.signal_type.unwrap() == SignalType::Output,
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status: StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             FinalData {
                 context: InstrContext { size: self.length },
@@ -1240,10 +1241,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, stmt.get_meta()),
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
                 is_output: self.signal_type.unwrap() == SignalType::Output,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status:StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             StoreBucket {
                 id: new_id(),
@@ -1286,10 +1288,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, expr.get_meta()),
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
                 is_output: self.signal_type.unwrap() == SignalType::Output,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status: StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             LoadBucket {
                 id: new_id(),
diff --git a/compiler/src/ir_processing/reduce_stack.rs b/compiler/src/ir_processing/reduce_stack.rs
index f4909322a..3ce9f9dbe 100644
--- a/compiler/src/ir_processing/reduce_stack.rs
+++ b/compiler/src/ir_processing/reduce_stack.rs
@@ -164,9 +164,9 @@ pub fn reduce_address_type(at: AddressType) -> AddressType {
     match at {
         Variable => Variable,
         Signal => Signal,
-        SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information } => {
+        SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information, counter_override} => {
             let cmp_address = Allocate::allocate(reduce_instruction(*cmp_address));
-            SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information }
+            SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information, counter_override}
         }
     }
 }

From 1b3a85b2b3f548e78a2f0507bda1e45cb2a6bb30 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 21 Sep 2023 10:31:26 -0500
Subject: [PATCH 25/42] [VAN-605] handle subcomponents (mostly)

---
 Cargo.lock                                    |  27 +-
 circom/src/compilation_user.rs                |  27 +-
 circom/tests/loops/inner_loop_simple.circom   |  56 ++-
 circom/tests/loops/inner_loops2.circom        |  56 +--
 circom/tests/loops/inner_loops3.circom        |  56 +--
 circom/tests/loops/inner_loops4.circom        | 107 ++++-
 circom/tests/loops/inner_loops5.circom        |  10 +-
 circom/tests/loops/inner_loops6.circom        |  49 +--
 circom/tests/loops/simple_variant_idx.circom  |  14 +-
 circom/tests/loops/vanguard-uc-comp.circom    |  18 +-
 .../tests/loops/variant_idx_in_loop_A.circom  |   8 +-
 .../tests/loops/variant_idx_in_loop_B.circom  |   8 +-
 circom/tests/subcmps/subcmps1.circom          | 126 +++++-
 circom/tests/subcmps/subcmps2.circom          | 218 +++++++++-
 circuit_passes/Cargo.toml                     |   1 +
 .../env/extracted_func_env.rs                 | 281 +++++++++---
 .../src/bucket_interpreter/env/mod.rs         |  23 +-
 .../bucket_interpreter/env/standard_env.rs    |  19 +-
 .../env/unrolled_block_env.rs                 |  11 +-
 .../src/bucket_interpreter/memory.rs          |  28 +-
 circuit_passes/src/bucket_interpreter/mod.rs  |  54 ++-
 .../src/bucket_interpreter/value.rs           |   2 +-
 .../src/passes/conditional_flattening.rs      |  20 +-
 .../deterministic_subcomponent_invocation.rs  |  18 +-
 .../src/passes/loop_unroll/body_extractor.rs  | 405 +++++++++++++-----
 .../loop_unroll/extracted_location_updater.rs | 203 ++++++---
 .../passes/loop_unroll/loop_env_recorder.rs   | 130 ++++--
 circuit_passes/src/passes/loop_unroll/mod.rs  |  29 +-
 .../src/passes/mapped_to_indexed.rs           |  18 +-
 circuit_passes/src/passes/mod.rs              |  86 +++-
 circuit_passes/src/passes/simplification.rs   |  23 +-
 .../src/passes/unknown_index_sanitization.rs  |  18 +-
 code_producers/src/llvm_elements/fr.rs        |  59 +--
 code_producers/src/llvm_elements/functions.rs |  34 +-
 code_producers/src/llvm_elements/mod.rs       |  10 +-
 compiler/src/circuit_design/circuit.rs        |  43 +-
 compiler/src/circuit_design/function.rs       |   2 +
 compiler/src/circuit_design/template.rs       |   1 +
 38 files changed, 1702 insertions(+), 596 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 27269ad96..54a515fe2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -220,6 +220,7 @@ dependencies = [
  "compiler",
  "const_format",
  "constraint_generation",
+ "indexmap 2.0.0",
  "intervallum",
  "program_structure",
 ]
@@ -445,6 +446,12 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
 [[package]]
 name = "errno"
 version = "0.3.1"
@@ -596,6 +603,12 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 
+[[package]]
+name = "hashbrown"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
+
 [[package]]
 name = "hermit-abi"
 version = "0.1.19"
@@ -635,7 +648,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg 1.1.0",
- "hashbrown",
+ "hashbrown 0.12.3",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.14.0",
 ]
 
 [[package]]
@@ -1003,7 +1026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4"
 dependencies = [
  "fixedbitset",
- "indexmap",
+ "indexmap 1.9.3",
 ]
 
 [[package]]
diff --git a/circom/src/compilation_user.rs b/circom/src/compilation_user.rs
index 7732d51f3..dd6e3b554 100644
--- a/circom/src/compilation_user.rs
+++ b/circom/src/compilation_user.rs
@@ -1,14 +1,12 @@
 use ansi_term::Colour;
 use circuit_passes::passes::PassManager;
-use compiler::compiler_interface;
-use compiler::compiler_interface::{Config, VCP};
+use compiler::compiler_interface::{self, Config, VCP};
 use program_structure::error_definition::Report;
 use program_structure::error_code::ReportCode;
 use program_structure::file_definition::FileLibrary;
 use program_structure::program_archive::ProgramArchive;
 use crate::VERSION;
 
-
 pub struct CompilerConfig {
     pub js_folder: String,
     pub wasm_name: String,
@@ -61,16 +59,16 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
     }
 
     if config.llvm_flag {
-        // Only run this passes if we are going to generate LLVM code
+        // Only run the passes if we are going to generate LLVM code
         let pm = PassManager::new();
         circuit = pm
-            .schedule_loop_unroll_pass(prime)
-            .schedule_conditional_flattening_pass(prime)
-            .schedule_mapped_to_indexed_pass(prime)
-            .schedule_unknown_index_sanitization_pass(prime)
-            .schedule_simplification_pass(prime)
-            .schedule_deterministic_subcmp_invoke_pass(prime)
-            .transform_circuit(circuit);
+            .schedule_loop_unroll_pass()
+            .schedule_conditional_flattening_pass()
+            .schedule_mapped_to_indexed_pass()
+            .schedule_unknown_index_sanitization_pass()
+            .schedule_simplification_pass()
+            .schedule_deterministic_subcmp_invoke_pass()
+            .transform_circuit(circuit, prime);
         compiler_interface::write_llvm_ir(
             &mut circuit,
             &program_archive,
@@ -78,11 +76,7 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
             &config.llvm_file,
             config.clean_llvm,
         )?;
-        println!(
-          "{} {}",
-            Colour::Green.paint("Written successfully:"),
-            config.llvm_file
-        );
+        println!("{} {}", Colour::Green.paint("Written successfully:"), config.llvm_file);
     }
 
     match (config.wat_flag, config.wasm_flag) {
@@ -124,7 +118,6 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
     Ok(())
 }
 
-
 fn wat_to_wasm(wat_file: &str, wasm_file: &str) -> Result<(), Report> {
     use std::fs::read_to_string;
     use std::fs::File;
diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index 01f7640b2..d1e0b00c8 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -21,8 +21,8 @@ component main = InnerLoops(2, 3);
 // %lvars = { n, m, b[0], b[1], i, j }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -31,7 +31,7 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
 //CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
 //CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
 //CHECK-NEXT:   store i256 %3, i256* %4, align 4
@@ -50,8 +50,8 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -60,7 +60,7 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
 //CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
 //CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
 //CHECK-NEXT:   store i256 %3, i256* %4, align 4
@@ -84,41 +84,35 @@ component main = InnerLoops(2, 3);
 //CHECK-NEXT:   store i256 0, i256* %6, align 4
 //CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8)
 //CHECK-NEXT:   %9 = bitcast [6 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
 //CHECK-NEXT:   %11 = bitcast [6 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
 //CHECK-NEXT:   %13 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
-//CHECK-NEXT:   %15 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
-//CHECK-NEXT:   %16 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %16, align 4
+//CHECK-NEXT:   store i256 1, i256* %13, align 4
+//CHECK-NEXT:   %14 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   %15 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
 //CHECK-NEXT:   %17 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
 //CHECK-NEXT:   %19 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
-//CHECK-NEXT:   %21 = bitcast [6 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %21, [0 x i256]* %0, i256* %22)
-//CHECK-NEXT:   %23 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   %24 = load i256, i256* %23, align 4
-//CHECK-NEXT:   %call.fr_add23 = call i256 @fr_add(i256 %24, i256 1)
-//CHECK-NEXT:   %25 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 %call.fr_add23, i256* %25, align 4
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
+//CHECK-NEXT:   %21 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %21, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %26 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %27 = load i256, i256* %26, align 4
-//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:   store i256 %27, i256* %28, align 4
+//CHECK-NEXT:   %22 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %23 = load i256, i256* %22, align 4
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %23, i256* %24, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index 98f20ad74..df0775c1d 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -36,7 +36,7 @@ component main = InnerLoops(5);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -56,12 +56,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -80,12 +80,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -104,12 +104,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_4]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -128,12 +128,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_5]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -154,7 +154,7 @@ component main = InnerLoops(5);
 //CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %9 = bitcast [8 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -170,10 +170,10 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %12 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
 //CHECK-NEXT:   %14 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -189,13 +189,13 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %18 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %18, [0 x i256]* %0, i256* %19)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %18, [0 x i256]* %0, i256* %19)
 //CHECK-NEXT:   %20 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, i256* %21)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, i256* %21)
 //CHECK-NEXT:   %22 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -211,16 +211,16 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %26 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
 //CHECK-NEXT:   %28 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
 //CHECK-NEXT:   %30 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, i256* %31)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, i256* %31)
 //CHECK-NEXT:   %32 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %32, [0 x i256]* %0, i256* %33)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %32, [0 x i256]* %0, i256* %33)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -236,19 +236,19 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %36 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %36, [0 x i256]* %0, i256* %37)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %36, [0 x i256]* %0, i256* %37)
 //CHECK-NEXT:   %38 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %38, [0 x i256]* %0, i256* %39)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %38, [0 x i256]* %0, i256* %39)
 //CHECK-NEXT:   %40 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, i256* %41)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, i256* %41)
 //CHECK-NEXT:   %42 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %42, [0 x i256]* %0, i256* %43)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %42, [0 x i256]* %0, i256* %43)
 //CHECK-NEXT:   %44 = bitcast [8 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %44, [0 x i256]* %0, i256* %45)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %44, [0 x i256]* %0, i256* %45)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index e0278271c..558d94961 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -27,7 +27,7 @@ component main = InnerLoops(5);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -47,12 +47,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -71,12 +71,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_3]]:
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -95,12 +95,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_4]]:
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_4]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -119,12 +119,12 @@ component main = InnerLoops(5);
 //CHECK-NEXT: }
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_5]]:
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_5]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
@@ -145,7 +145,7 @@ component main = InnerLoops(5);
 //CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -156,10 +156,10 @@ component main = InnerLoops(5);
 //CHECK-NEXT:  unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
 //CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -170,13 +170,13 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
 //CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
 //CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_3]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -187,16 +187,16 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
 //CHECK-NEXT:   %24 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %24, [0 x i256]* %0, i256* %25)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %24, [0 x i256]* %0, i256* %25)
 //CHECK-NEXT:   %26 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
 //CHECK-NEXT:   %28 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -207,20 +207,20 @@ component main = InnerLoops(5);
 //CHECK-NEXT: unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %31 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %31, [0 x i256]* %0, i256* %32)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %31, [0 x i256]* %0, i256* %32)
 //CHECK-NEXT:   %33 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %33, [0 x i256]* %0, i256* %34)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %33, [0 x i256]* %0, i256* %34)
 //CHECK-NEXT:   %35 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %35, [0 x i256]* %0, i256* %36)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %35, [0 x i256]* %0, i256* %36)
 //CHECK-NEXT:   %37 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %37, [0 x i256]* %0, i256* %38)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %37, [0 x i256]* %0, i256* %38)
 //CHECK-NEXT:   %39 = bitcast [7 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //DELETE:   %40 = bitcast i256* %40 to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_5]]([0 x i256]* %39, [0 x i256]* %0, i256* %40)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %39, [0 x i256]* %0, i256* %40)
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops4.circom b/circom/tests/loops/inner_loops4.circom
index e51a9d7ee..6c33e35a4 100644
--- a/circom/tests/loops/inner_loops4.circom
+++ b/circom/tests/loops/inner_loops4.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
@@ -9,9 +8,115 @@ template InnerLoops(n) {
     var j;
     for (var i = 0; i < n; i++) {
         for (j = 0; j <= i; j++) {
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling. So it
+            //  ends up creating two slightly different functions for this innermost body, one
+            //  for each iteration of the outer loop (i.e. when b=0 and when b=1). This result
+            //  is logically correct but not optimal in terms of code size.
             b[i] = a[i - j];
         }
     }
 }
 
 component main = InnerLoops(2);
+
+// %0 (i.e. signal arena) = { a[0], a[1] }
+// %lvars = { n, b[0], b[1], j, i }
+//
+//Fully unrolled:
+//  b[0] = a[0];
+//  b[1] = a[1];
+//  b[1] = a[0];
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 %5)
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %call.fr_sub)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %7, i256* %8, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %11, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   %10 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %14, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops5.circom b/circom/tests/loops/inner_loops5.circom
index 843b855d4..8cee6ce78 100644
--- a/circom/tests/loops/inner_loops5.circom
+++ b/circom/tests/loops/inner_loops5.circom
@@ -21,7 +21,7 @@ component main = Num2Bits(4);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
@@ -75,13 +75,13 @@ component main = Num2Bits(4);
 //CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %4 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
 //CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
 //CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
 //CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
index 733beea05..ee885c37f 100644
--- a/circom/tests/loops/inner_loops6.circom
+++ b/circom/tests/loops/inner_loops6.circom
@@ -15,9 +15,10 @@ template Num2Bits(n) {
             //  safe to move into a new function since it uses 'j' which is unknown. That results
             //  in the outer loop unrolling without extrating the body to a new function. Then
             //  the two copies of the inner loop are processed and their bodies are extracted to
-            //  new functions and replaced with calls to those functions before unrolling.
-            //  This result is logically correct but not optimal because the 2 extracted body
-            //  functions are identical.
+            //  new functions and replaced with calls to those functions before unrolling. So it
+            //  ends up creating two different functions for this innermost body, one for each
+            //  iteration of the outer loop (i.e. when b=0 and when b=1). In this case, those 2
+            //  function are identical. This is logically correct but not optimal in code size.
         	out[i*n + j] <-- in;
         }
     }
@@ -35,14 +36,14 @@ component main = Num2Bits(2);
 //	out[3] = in;
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_1]]:
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -59,14 +60,14 @@ component main = Num2Bits(2);
 //CHECK-NEXT: }
 // 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID_2]]:
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -88,28 +89,22 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   store i256 0, i256* %3, align 4
 //CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
 //CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
 //CHECK-NEXT:   %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
-//CHECK-NEXT:   %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
-//CHECK-NEXT:   %11 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 0, i256* %11, align 4
+//CHECK-NEXT:   store i256 1, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
 //CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
-//CHECK-NEXT:   %14 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
-//CHECK-NEXT:   %16 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
-//CHECK-NEXT:   %call.fr_add15 = call i256 @fr_add(i256 %17, i256 1)
-//CHECK-NEXT:   %18 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %call.fr_add15, i256* %18, align 4
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %14, align 4
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
index 193c060cf..b72bbc2fe 100644
--- a/circom/tests/loops/simple_variant_idx.circom
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -23,19 +23,17 @@ component main = SimpleVariantIdx(3);
 // %lvars =  [ n, lc, i ]
 // %subcmps = []
 //
-// NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
-//
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0, i256* %fixed_1){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY:
 //CHECK-NEXT: store{{[0-9]+}}:
-//CHECK-NEXT:   %3 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %3 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %4 = load i256, i256* %3, align 4
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   store i256 %4, i256* %5, align 4
@@ -59,13 +57,13 @@ component main = SimpleVariantIdx(3);
 //CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
 //CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
 //CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/vanguard-uc-comp.circom b/circom/tests/loops/vanguard-uc-comp.circom
index 02891ff8c..b67e6c122 100644
--- a/circom/tests/loops/vanguard-uc-comp.circom
+++ b/circom/tests/loops/vanguard-uc-comp.circom
@@ -24,11 +24,9 @@ component main = Num2Bits(2);
 // %lvars =  [ n, lc1, e2, i ]
 // %subcmps = []
 //
-// NOTE: The order of `fixed*` parameters corresponding to use sites in the body is non-deterministic.
-//
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0, i256* %fixed_1, i256* %fixed_2, i256* %fixed_3){{.*}} {
-//CHECK-NEXT: ..generated..loop.body.{{.*}}[[$F_ID]]:
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
@@ -38,14 +36,14 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
 //CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
 //CHECK-NEXT:   store i256 %call.fr_bit_and, i256* %4, align 4
 //CHECK-NEXT:   br label %assert2
 //CHECK-EMPTY: 
 //CHECK-NEXT: assert2:
-//CHECK-NEXT:   %5 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
 //CHECK-NEXT:   %8 = load i256, i256* %7, align 4
 //CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 1)
 //CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %6, i256 %call.fr_sub)
@@ -58,7 +56,7 @@ component main = Num2Bits(2);
 //CHECK-NEXT: store3:
 //CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   %10 = load i256, i256* %9, align 4
-//CHECK-NEXT:   %11 = getelementptr i256, i256* %fixed_{{.*}}, i32 0
+//CHECK-NEXT:   %11 = getelementptr i256, i256* %fix_[[X4]], i32 0
 //CHECK-NEXT:   %12 = load i256, i256* %11, align 4
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
 //CHECK-NEXT:   %14 = load i256, i256* %13, align 4
@@ -98,13 +96,13 @@ component main = Num2Bits(2);
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
 //CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* %7, i256* %8, i256* %9)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* %7, i256* %8, i256* %9)
 //CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
 //CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %14)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %14)
 //CHECK-NEXT:   br label %assert{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: assert{{[0-9]+}}:
diff --git a/circom/tests/loops/variant_idx_in_loop_A.circom b/circom/tests/loops/variant_idx_in_loop_A.circom
index 76253588f..b35250f72 100644
--- a/circom/tests/loops/variant_idx_in_loop_A.circom
+++ b/circom/tests/loops/variant_idx_in_loop_A.circom
@@ -18,14 +18,14 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -46,8 +46,8 @@ component main = VariantIndex(2);
 //CHECK:      unrolled_loop{{[0-9]+}}:
 //CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, i256* %4)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, i256* %4)
 //CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6)
 //CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_B.circom b/circom/tests/loops/variant_idx_in_loop_B.circom
index 35750e5ba..b85223b1d 100644
--- a/circom/tests/loops/variant_idx_in_loop_B.circom
+++ b/circom/tests/loops/variant_idx_in_loop_B.circom
@@ -20,14 +20,14 @@ component main = VariantIndex(2);
 // %subcmps = []
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fixed_0){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
 //CHECK:      store{{[0-9]+}}:
 //CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
 //CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fixed_0, i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_0, i32 0
 //CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
@@ -49,11 +49,11 @@ component main = VariantIndex(2);
 //CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %7)
 //CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
 //CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 2
-//CHECK-NEXT:   call void @..generated..loop.body.{{.*}}[[$F_ID]]([0 x i256]* %8, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %8, [0 x i256]* %0, i256* %10)
 //CHECK-NEXT:   br label %store{{[0-9]+}}
 //CHECK-EMPTY: 
 //CHECK-NEXT: store{{[0-9]+}}:
diff --git a/circom/tests/subcmps/subcmps1.circom b/circom/tests/subcmps/subcmps1.circom
index 75d73fd60..97d947f29 100644
--- a/circom/tests/subcmps/subcmps1.circom
+++ b/circom/tests/subcmps/subcmps1.circom
@@ -1,11 +1,10 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
-// XFAIL:.*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template IsZero() {
-    signal input in;
-    signal output out;
+    signal input in;        // subcmp signal 1
+    signal output out;      // subcmp signal 0
 
     signal inv;
 
@@ -29,4 +28,121 @@ template SubCmps1(n) {
     }
 }
 
-component main = SubCmps1(2);
\ No newline at end of file
+component main = SubCmps1(3);
+
+// %0 (i.e. signal arena) = [ outs[0], outs[1], outs[2], ins[0], ins[1], ins[2] ]
+// %lvars =  [ n, i ]
+// %subcmps = [ IsZero[0]{signals=[out,in,inv]}, IsZero[1]{SAME} ]
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %[[X1:subfix_[0-9]+]], i256* %[[X2:fix_[0-9]+]], i256* %[[X3:fix_[0-9]+]], i256* %[[X4:subfix_[0-9]+]],
+//CHECK-SAME: [0 x i256]* %[[X5:sub_[0-9]+]], i256* %[[X6:subc_[0-9]+]], [0 x i256]* %[[X7:sub_[0-9]+]], i256* %[[X8:subc_[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %[[X5]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %[[X5]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %6, i256 %8, i1* %constraint1)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %11, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop5:
+//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %9 = load [0 x i256]*, [0 x i256]** %8, align 8
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %14 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %15 = load [0 x i256]*, [0 x i256]** %14, align 8
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %16, i32 0, i256 0
+//CHECK-NEXT:   %18 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %19 = load [0 x i256]*, [0 x i256]** %18, align 8
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0
+//CHECK-NEXT:   %21 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %22 = bitcast i32* %21 to i256*
+//CHECK-NEXT:   %23 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %24 = load [0 x i256]*, [0 x i256]** %23, align 8
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %24, i32 0
+//CHECK-NEXT:   %26 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %27 = bitcast i32* %26 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %17, [0 x i256]* %20, i256* %22, [0 x i256]* %25, i256* %27)
+//CHECK-NEXT:   %28 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %30 = load [0 x i256]*, [0 x i256]** %29, align 8
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %31, i32 0, i256 1
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %35 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %36 = load [0 x i256]*, [0 x i256]** %35, align 8
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %36, i32 0
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 0
+//CHECK-NEXT:   %39 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %40 = load [0 x i256]*, [0 x i256]** %39, align 8
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0
+//CHECK-NEXT:   %42 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %43 = bitcast i32* %42 to i256*
+//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %45 = load [0 x i256]*, [0 x i256]** %44, align 8
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0
+//CHECK-NEXT:   %47 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %48 = bitcast i32* %47 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %28, [0 x i256]* %0, i256* %32, i256* %33, i256* %34, i256* %38, [0 x i256]* %41, i256* %43, [0 x i256]* %46, i256* %48)
+//CHECK-NEXT:   %49 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %50 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %51 = load [0 x i256]*, [0 x i256]** %50, align 8
+//CHECK-NEXT:   %52 = getelementptr [0 x i256], [0 x i256]* %51, i32 0
+//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %52, i32 0, i256 1
+//CHECK-NEXT:   %54 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %57 = load [0 x i256]*, [0 x i256]** %56, align 8
+//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %57, i32 0
+//CHECK-NEXT:   %59 = getelementptr [0 x i256], [0 x i256]* %58, i32 0, i256 0
+//CHECK-NEXT:   %60 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %61 = load [0 x i256]*, [0 x i256]** %60, align 8
+//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %61, i32 0
+//CHECK-NEXT:   %63 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %64 = bitcast i32* %63 to i256*
+//CHECK-NEXT:   %65 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %66 = load [0 x i256]*, [0 x i256]** %65, align 8
+//CHECK-NEXT:   %67 = getelementptr [0 x i256], [0 x i256]* %66, i32 0
+//CHECK-NEXT:   %68 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %69 = bitcast i32* %68 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %49, [0 x i256]* %0, i256* %53, i256* %54, i256* %55, i256* %59, [0 x i256]* %62, i256* %64, [0 x i256]* %67, i256* %69)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index 9dbffd486..a3792cdb4 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.6;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 // XFAIL:.*
 
 template Sum(n) {
@@ -42,3 +42,219 @@ component main = Caller();
 //CHECK: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUBCMP_PTR]]
 //CHECK: %[[SUBCMP_INP:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 {{[1-4]}}
 //CHECK: store i256 %[[CALL_VAL]], i256* %[[SUBCMP_INP]]
+
+/*
+define void @Sum_0_build({ [0 x i256]*, i32 }* %0) !dbg !9 {
+main:
+  %1 = alloca [5 x i256], align 8
+  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+  store i32 4, i32* %2, align 4
+  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
+  store [0 x i256]* %4, [0 x i256]** %3, align 8
+  ret void
+}
+
+define void @Sum_0_run([0 x i256]* %0) !dbg !11 {
+prelude:
+  %lvars = alloca [3 x i256], align 8
+  %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+  br label %store1
+
+store1:
+  %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+  store i256 4, i256* %1, align 4
+  br label %store2
+
+store2:
+  %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 0, i256* %2, align 4
+  br label %store3
+
+store3:
+  %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 0, i256* %3, align 4
+  br label %unrolled_loop4
+
+unrolled_loop4:
+  %4 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %5 = load i256, i256* %4, align 4
+  %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+  %7 = load i256, i256* %6, align 4
+  %call.fr_add = call i256 @fr_add(i256 %5, i256 %7)
+  %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add, i256* %8, align 4
+  %9 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 1, i256* %9, align 4
+  %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %11 = load i256, i256* %10, align 4
+  %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+  %13 = load i256, i256* %12, align 4
+  %call.fr_add1 = call i256 @fr_add(i256 %11, i256 %13)
+  %14 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add1, i256* %14, align 4
+  %15 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 2, i256* %15, align 4
+  %16 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %17 = load i256, i256* %16, align 4
+  %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+  %19 = load i256, i256* %18, align 4
+  %call.fr_add2 = call i256 @fr_add(i256 %17, i256 %19)
+  %20 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add2, i256* %20, align 4
+  %21 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 3, i256* %21, align 4
+  %22 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %23 = load i256, i256* %22, align 4
+  %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+  %25 = load i256, i256* %24, align 4
+  %call.fr_add3 = call i256 @fr_add(i256 %23, i256 %25)
+  %26 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add3, i256* %26, align 4
+  %27 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 4, i256* %27, align 4
+  br label %store5
+
+store5:
+  %28 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %29 = load i256, i256* %28, align 4
+  %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+  store i256 %29, i256* %30, align 4
+  %31 = load i256, i256* %30, align 4
+  %constraint = alloca i1, align 1
+  call void @__constraint_values(i256 %29, i256 %31, i1* %constraint)
+  br label %prologue
+
+prologue:
+  ret void
+}
+
+define void @Caller_1_build({ [0 x i256]*, i32 }* %0) !dbg !18 {
+main:
+  %1 = alloca [5 x i256], align 8
+  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+  store i32 4, i32* %2, align 4
+  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
+  store [0 x i256]* %4, [0 x i256]** %3, align 8
+  ret void
+}
+
+define void @Caller_1_run([0 x i256]* %0) !dbg !20 {
+prelude:
+  %lvars = alloca [1 x i256], align 8
+  %subcmps = alloca [1 x { [0 x i256]*, i32 }], align 8
+  br label %create_cmp1
+
+create_cmp1:
+  %1 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+  call void @Sum_0_build({ [0 x i256]*, i32 }* %1)
+  br label %store2
+
+store2:
+  %2 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 0, i256* %2, align 4
+  br label %unrolled_loop3
+
+unrolled_loop3:
+  %nop_0_arena = alloca [1 x i256], align 8
+  %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+  %4 = load i256, i256* %3, align 4
+  %5 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena, i32 0, i32 0
+  store i256 %4, i256* %5, align 4
+  %6 = bitcast [1 x i256]* %nop_0_arena to i256*
+  %call.nop_0 = call i256 @nop_0(i256* %6)
+  %7 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+  %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+  store i256 %call.nop_0, i256* %9, align 4
+  %10 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter = load i32, i32* %10, align 4
+  %decrement.counter = sub i32 %load.subcmp.counter, 1
+  store i32 %decrement.counter, i32* %10, align 4
+  %11 = load i256, i256* %9, align 4
+  %constraint = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_0, i256 %11, i1* %constraint)
+  %12 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 1, i256* %12, align 4
+  %nop_0_arena1 = alloca [1 x i256], align 8
+  %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+  %14 = load i256, i256* %13, align 4
+  %15 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena1, i32 0, i32 0
+  store i256 %14, i256* %15, align 4
+  %16 = bitcast [1 x i256]* %nop_0_arena1 to i256*
+  %call.nop_02 = call i256 @nop_0(i256* %16)
+  %17 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %18 = load [0 x i256]*, [0 x i256]** %17, align 8
+  %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i32 2
+  store i256 %call.nop_02, i256* %19, align 4
+  %20 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter3 = load i32, i32* %20, align 4
+  %decrement.counter4 = sub i32 %load.subcmp.counter3, 1
+  store i32 %decrement.counter4, i32* %20, align 4
+  %21 = load i256, i256* %19, align 4
+  %constraint5 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_02, i256 %21, i1* %constraint5)
+  %22 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 2, i256* %22, align 4
+  %nop_0_arena6 = alloca [1 x i256], align 8
+  %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+  %24 = load i256, i256* %23, align 4
+  %25 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena6, i32 0, i32 0
+  store i256 %24, i256* %25, align 4
+  %26 = bitcast [1 x i256]* %nop_0_arena6 to i256*
+  %call.nop_07 = call i256 @nop_0(i256* %26)
+  %27 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+  %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i32 3
+  store i256 %call.nop_07, i256* %29, align 4
+  %30 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter8 = load i32, i32* %30, align 4
+  %decrement.counter9 = sub i32 %load.subcmp.counter8, 1
+  store i32 %decrement.counter9, i32* %30, align 4
+  %31 = load i256, i256* %29, align 4
+  %constraint10 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_07, i256 %31, i1* %constraint10)
+  %32 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 3, i256* %32, align 4
+  %nop_0_arena11 = alloca [1 x i256], align 8
+  %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+  %34 = load i256, i256* %33, align 4
+  %35 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena11, i32 0, i32 0
+  store i256 %34, i256* %35, align 4
+  %36 = bitcast [1 x i256]* %nop_0_arena11 to i256*
+  %call.nop_012 = call i256 @nop_0(i256* %36)
+  %37 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+  %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0, i32 4
+  store i256 %call.nop_012, i256* %39, align 4
+  %40 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter13 = load i32, i32* %40, align 4
+  %decrement.counter14 = sub i32 %load.subcmp.counter13, 1
+  store i32 %decrement.counter14, i32* %40, align 4
+  %41 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %42 = load [0 x i256]*, [0 x i256]** %41, align 8
+  call void @Sum_0_run([0 x i256]* %42)
+  %43 = load i256, i256* %39, align 4
+  %constraint15 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_012, i256 %43, i1* %constraint15)
+  %44 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 4, i256* %44, align 4
+  br label %store4
+
+store4:
+  %45 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %46 = load [0 x i256]*, [0 x i256]** %45, align 8
+  %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i32 0
+  %48 = load i256, i256* %47, align 4
+  %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+  store i256 %48, i256* %49, align 4
+  %50 = load i256, i256* %49, align 4
+  %constraint16 = alloca i1, align 1
+  call void @__constraint_values(i256 %48, i256 %50, i1* %constraint16)
+  br label %prologue
+
+prologue:
+  ret void
+}
+*/
diff --git a/circuit_passes/Cargo.toml b/circuit_passes/Cargo.toml
index c2ce7ac77..c0a9b2697 100644
--- a/circuit_passes/Cargo.toml
+++ b/circuit_passes/Cargo.toml
@@ -13,3 +13,4 @@ code_producers = {path = "../code_producers"}
 intervallum = "1.4.0"
 circom_algebra = {path = "../circom_algebra"}
 const_format = "0.2.31"
+indexmap = "2.0.0"
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 9a8bb2bab..c8d1a3b63 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -1,22 +1,31 @@
 use std::cell::Ref;
-use std::collections::HashMap;
+use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
+use compiler::intermediate_representation::Instruction;
+use compiler::intermediate_representation::ir_interface::{AddressType, ValueBucket, ValueType};
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
+use crate::passes::loop_unroll::body_extractor::ToOriginalLocation;
 use super::{Env, LibraryAccess};
 
-/// This Env is used to process functions created when extracting loop bodies into
-/// `LOOP_BODY_FN_PREFIX` functions.
+/// This Env is used to process functions created by extracting loop bodies
+/// into 'LOOP_BODY_FN_PREFIX' functions. It has to interpret the references
+/// produced by ExtractedFunctionLocationUpdater (i.e. some loads and stores
+/// are converted to AddressType::SubcmpSignal that indicate which function
+/// parameter holds the necessary data).
 #[derive(Clone)]
 pub struct ExtractedFuncEnvData<'a> {
     base: Box<Env<'a>>,
+    remap: ToOriginalLocation,
 }
 
 impl Display for ExtractedFuncEnvData<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        self.base.fmt(f)
+        write!(f, "ExtractedFuncEnv{{")?;
+        self.base.fmt(f)?;
+        write!(f, "}}")
     }
 }
 
@@ -30,97 +39,272 @@ impl LibraryAccess for ExtractedFuncEnvData<'_> {
     }
 }
 
+// All subcomponent lookups need to use the map from loop unrolling to convert the
+//  AddressType::SubcmpSignal references created by ExtractedFunctionLocationUpdater
+//  back into the proper reference to access the correct Env entry.
 impl<'a> ExtractedFuncEnvData<'a> {
-    pub fn new(inner: Env<'a>) -> Self {
-        ExtractedFuncEnvData { base: Box::new(inner) }
+    pub fn new(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
+        ExtractedFuncEnvData { base: Box::new(inner), remap }
+    }
+
+    pub fn get_base(self) -> Env<'a> {
+        *self.base
     }
 
     pub fn get_var(&self, idx: usize) -> Value {
-        println!("get_var({}) = {}", idx, self.base.get_var(idx));
+        // Local variables are referenced in the normal way
         self.base.get_var(idx)
     }
 
     pub fn get_signal(&self, idx: usize) -> Value {
-        println!("get_signal({}) = {}", idx, self.base.get_signal(idx));
+        // Signals are referenced in the normal way
         self.base.get_signal(idx)
     }
 
     pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
-        //NOTE: `signal_idx` will always be 0 for the fixed* parameters
-        assert_eq!(signal_idx, 0);
-        println!("TODO: must handle args here in addition to subcomps");
-        // self.base.get_subcmp_signal(subcmp_idx, signal_idx)
-        Value::Unknown
+        let res = match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                //  the LocationRule that 'signal_idx' is computed from.
+                assert_eq!(signal_idx, 0);
+                match loc {
+                    AddressType::Variable => self.base.get_var(*idx),
+                    AddressType::Signal => self.base.get_signal(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.get_subcmp_signal(subcmp, *idx)
+                    }
+                }
+            }
+        };
+        println!("[FINDME] get_subcmp_signal({subcmp_idx}, {signal_idx}) = {} in {}", res, self);
+        res
     }
 
     pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
-        todo!();
-        self.base.get_subcmp_name(subcmp_idx)
+        match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                match loc {
+                    AddressType::Variable => self.base.get_subcmp_name(*idx),
+                    AddressType::Signal => self.base.get_subcmp_name(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                        //  the LocationRule that 'signal_idx' is computed from.
+                        assert_eq!(*idx, 0);
+                        self.base.get_subcmp_name(subcmp)
+                    }
+                }
+            }
+        }
     }
 
     pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
-        todo!();
-        self.base.get_subcmp_template_id(subcmp_idx)
+        match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                match loc {
+                    AddressType::Variable => self.base.get_subcmp_template_id(*idx),
+                    AddressType::Signal => self.base.get_subcmp_template_id(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                        //  the LocationRule that 'signal_idx' is computed from.
+                        assert_eq!(*idx, 0);
+                        self.base.get_subcmp_template_id(subcmp)
+                    }
+                }
+            }
+        }
     }
 
     pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
-        todo!();
-        self.base.subcmp_counter_is_zero(subcmp_idx)
+        let res = match self.remap.get(&subcmp_idx).cloned() {
+            //TODO: Is this None case being hit by a pre-existing subcmp at index 0 reference? I think so. Can I verify?
+            //  All subcmp refs in extracted body should have been replaced with refs to a subfix parameter... right?
+            //OBS: It happens because there will be Unknown counter when certain loop bodies are extracted to a function.
+            //  That means I do need to add the code to decrement counters inside the loop and let StoreBucket generate
+            //  the counter checks that will determine when to execute the "run" function at runtime.
+            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.subcmp_counter_is_zero(subcmp)
+                    }
+                    _ => false, // no counter for Variable/Signal types
+                }
+            }
+        };
+        println!("[FINDME] subcmp_counter_is_zero({subcmp_idx}) = {} in {}", res, self);
+        res
     }
 
     pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
-        todo!();
-        self.base.subcmp_counter_equal_to(subcmp_idx, value)
+        let res = match self.remap.get(&subcmp_idx).cloned() {
+            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.subcmp_counter_equal_to(subcmp, value)
+                    }
+                    _ => false, // no counter for Variable/Signal types
+                }
+            }
+        };
+        println!("[FINDME] subcmp_counter_equal_to({subcmp_idx}, {value}) = {} in {}", res, self);
+        res
     }
 
     pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
-        todo!();
         self.base.get_vars_clone()
     }
 
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.base.get_vars_sort()
+    }
+
     pub fn set_var(self, idx: usize, value: Value) -> Self {
-        println!("set_var({}, {}), old = {}", idx, value, self.base.get_var(idx));
-        ExtractedFuncEnvData { base: Box::new(self.base.set_var(idx, value)) }
+        // Local variables are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_var(idx, value)), remap: self.remap }
     }
 
     pub fn set_signal(self, idx: usize, value: Value) -> Self {
-        println!("set_signal({}, {}), old = {}", idx, value, self.base.get_signal(idx));
-        ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)) }
+        // Signals are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)), remap: self.remap }
     }
 
     pub fn set_all_to_unk(self) -> Self {
-        todo!();
-        ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()) }
+        // Local variables are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()), remap: self.remap }
     }
 
     pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
-        todo!();
-        ExtractedFuncEnvData { base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)) }
+        todo!("set_subcmp_to_unk({})", subcmp_idx);
+        // ExtractedFuncEnvData {
+        //     base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)),
+        //     remap: self.remap,
+        // }
     }
 
     pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        todo!();
-        ExtractedFuncEnvData {
-            base: Box::new(self.base.set_subcmp_signal(subcmp_idx, signal_idx, value)),
-        }
+        let temp_str_self = format!("{}", self);
+        let temp_str_value = format!("{}", value);
+        //NOTE: This is only called by BucketInterpreter::store_value_in_address.
+        //Use the map from loop unrolling to convert the SubcmpSignal reference back
+        //  into the proper reference (reversing ExtractedFunctionLocationUpdater).
+        let new_env = match self.remap.get(&subcmp_idx).cloned() {
+            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
+            //  to the 'remap' (producing None result here) because those parameters are
+            //  not actually used to access signals, just to call _run and update counter.
+            None => *self.base,
+            Some((loc, idx)) => {
+                //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                //  the LocationRule that 'signal_idx' is computed from.
+                assert_eq!(signal_idx, 0);
+                match loc {
+                    AddressType::Variable => self.base.set_var(idx, value),
+                    AddressType::Signal => self.base.set_signal(idx, value),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.set_subcmp_signal(subcmp, idx, value)
+                    }
+                }
+            }
+        };
+        println!(
+            "[FINDME] set_subcmp_signal({subcmp_idx}, {signal_idx}, {})\n BEFORE: {}\n AFTER: {}",
+            temp_str_value, temp_str_self, new_env
+        );
+        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
-        todo!();
-        ExtractedFuncEnvData { base: Box::new(self.base.decrease_subcmp_counter(subcmp_idx)) }
+        let temp_str_self = format!("{}", self);
+        let new_env = match self.remap.get(&subcmp_idx).cloned() {
+            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
+            //  to the 'remap' (producing None result here) because those parameters are
+            //  not actually used to access signals, just to call _run and update counter.
+            //  No counter update needed when SubcmpSignal is used for these special cases.
+            None => *self.base,
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.decrease_subcmp_counter(subcmp)
+                    }
+                    _ => *self.base, // no counter for Variable/Signal types
+                }
+            }
+        };
+        println!(
+            "[FINDME] decrease_subcmp_counter({subcmp_idx})\n BEFORE: {}\n AFTER: {}",
+            temp_str_self, new_env
+        );
+        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
     pub fn run_subcmp(
         self,
-        subcmp_idx: usize,
-        name: &String,
-        interpreter: &BucketInterpreter,
-        observe: bool,
+        _subcmp_idx: usize,
+        _name: &String,
+        _interpreter: &BucketInterpreter,
+        _observe: bool,
     ) -> Self {
-        todo!();
-        ExtractedFuncEnvData {
-            base: Box::new(self.base.run_subcmp(subcmp_idx, name, interpreter, observe)),
-        }
+        //Return self just like the StandardEnvData
+        self
     }
 
     pub fn create_subcmp(
@@ -130,9 +314,10 @@ impl<'a> ExtractedFuncEnvData<'a> {
         count: usize,
         template_id: usize,
     ) -> Self {
-        todo!();
-        ExtractedFuncEnvData {
-            base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
-        }
+        todo!("create_subcmp({name},{base_index},{count},{template_id})");
+        // ExtractedFuncEnvData {
+        //     base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
+        //     remap: self.remap,
+        // }
     }
 }
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
index a623cc49d..30e729888 100644
--- a/circuit_passes/src/bucket_interpreter/env/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -1,11 +1,11 @@
 use std::cell::Ref;
-use std::collections::HashMap;
+use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::loop_unroll::body_extractor::LoopBodyExtractor;
+use crate::passes::loop_unroll::body_extractor::{LoopBodyExtractor, ToOriginalLocation};
 use self::extracted_func_env::ExtractedFuncEnvData;
 use self::standard_env::StandardEnvData;
 use self::unrolled_block_env::UnrolledBlockEnvData;
@@ -108,8 +108,15 @@ impl<'a> Env<'a> {
         Env::UnrolledBlock(UnrolledBlockEnvData::new(inner, extractor))
     }
 
-    pub fn new_extracted_func_env(inner: Env<'a>) -> Self {
-        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner))
+    pub fn new_extracted_func_env(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
+        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, remap))
+    }
+
+    pub fn peel_extracted_func(self) -> Self {
+        match self {
+            Env::ExtractedFunction(d) => d.get_base(),
+            _ => self,
+        }
     }
 
     // READ OPERATIONS
@@ -177,6 +184,14 @@ impl<'a> Env<'a> {
         }
     }
 
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        match self {
+            Env::Standard(d) => d.get_vars_sort(),
+            Env::UnrolledBlock(d) => d.get_vars_sort(),
+            Env::ExtractedFunction(d) => d.get_vars_sort(),
+        }
+    }
+
     // WRITE OPERATIONS
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         match self {
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
index b863896d5..ab211aa02 100644
--- a/circuit_passes/src/bucket_interpreter/env/standard_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -1,5 +1,5 @@
 use std::cell::Ref;
-use std::collections::HashMap;
+use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
@@ -9,17 +9,17 @@ use super::{SubcmpEnv, LibraryAccess};
 
 #[derive(Clone)]
 pub struct StandardEnvData<'a> {
-    pub vars: HashMap<usize, Value>,
-    pub signals: HashMap<usize, Value>,
-    pub subcmps: HashMap<usize, SubcmpEnv>,
-    pub libs: &'a dyn LibraryAccess,
+    vars: HashMap<usize, Value>,
+    signals: HashMap<usize, Value>,
+    subcmps: HashMap<usize, SubcmpEnv>,
+    libs: &'a dyn LibraryAccess,
 }
 
 impl Display for StandardEnvData<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
         write!(
             f,
-            "\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}",
+            "StandardEnv{{\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}}}",
             self.vars, self.signals, self.subcmps
         )
     }
@@ -78,6 +78,13 @@ impl<'a> StandardEnvData<'a> {
         self.vars.clone()
     }
 
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.vars.iter().fold(BTreeMap::new(), |mut acc, e| {
+            acc.insert(*e.0, e.1.clone());
+            acc
+        })
+    }
+
     // WRITE OPERATIONS
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         let mut copy = self;
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
index 08f3f112e..9b4b3dae7 100644
--- a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -1,9 +1,8 @@
 use std::cell::Ref;
-use std::collections::HashMap;
+use std::collections::{HashMap, BTreeMap};
 use std::fmt::{Display, Formatter, Result};
 use compiler::circuit_design::function::FunctionCode;
 use compiler::circuit_design::template::TemplateCode;
-
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::LOOP_BODY_FN_PREFIX;
@@ -22,7 +21,9 @@ pub struct UnrolledBlockEnvData<'a> {
 
 impl Display for UnrolledBlockEnvData<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> Result {
-        self.base.fmt(f)
+        write!(f, "UnrolledBlockEnv{{")?;
+        self.base.fmt(f)?;
+        write!(f, "}}")
     }
 }
 
@@ -81,6 +82,10 @@ impl<'a> UnrolledBlockEnvData<'a> {
         self.base.get_vars_clone()
     }
 
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.base.get_vars_sort()
+    }
+
     pub fn set_var(self, idx: usize, value: Value) -> Self {
         UnrolledBlockEnvData {
             base: Box::new(self.base.set_var(idx, value)),
diff --git a/circuit_passes/src/bucket_interpreter/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
index 61b08ffbc..368474491 100644
--- a/circuit_passes/src/bucket_interpreter/memory.rs
+++ b/circuit_passes/src/bucket_interpreter/memory.rs
@@ -9,6 +9,7 @@ use compiler::compiler_interface::Circuit;
 use crate::bucket_interpreter::BucketInterpreter;
 use crate::bucket_interpreter::env::{Env, LibraryAccess};
 use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::passes::GlobalPassData;
 
 pub struct PassMemory {
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need
@@ -26,9 +27,9 @@ pub struct PassMemory {
 }
 
 impl PassMemory {
-    pub fn new(prime: &String, current_scope: String, io_map: TemplateInstanceIOMap) -> Self {
+    pub fn new(prime: String, current_scope: String, io_map: TemplateInstanceIOMap) -> Self {
         PassMemory {
-            prime: prime.to_string(),
+            prime,
             current_scope: RefCell::new(current_scope),
             io_map: RefCell::new(io_map),
             constant_fields: Default::default(),
@@ -40,31 +41,42 @@ impl PassMemory {
         }
     }
 
-    pub fn build_interpreter<'a>(
+    pub fn build_interpreter<'a, 'd: 'a>(
         &'a self,
+        global_data: &'d RefCell<GlobalPassData>,
         observer: &'a dyn InterpreterObserver,
     ) -> BucketInterpreter {
-        self.build_interpreter_with_scope(observer, self.current_scope.borrow().to_string())
+        self.build_interpreter_with_scope(
+            global_data,
+            observer,
+            self.current_scope.borrow().to_string(),
+        )
     }
 
-    pub fn build_interpreter_with_scope<'a>(
+    pub fn build_interpreter_with_scope<'a, 'd: 'a>(
         &'a self,
+        global_data: &'d RefCell<GlobalPassData>,
         observer: &'a dyn InterpreterObserver,
         scope: String,
     ) -> BucketInterpreter {
-        BucketInterpreter::init(observer, self, scope)
+        BucketInterpreter::init(global_data, observer, self, scope)
     }
 
     pub fn set_scope(&self, template: &TemplateCode) {
         self.current_scope.replace(template.header.clone());
     }
 
-    pub fn run_template(&self, observer: &dyn InterpreterObserver, template: &TemplateCode) {
+    pub fn run_template<'d>(
+        &self,
+        global_data: &'d RefCell<GlobalPassData>,
+        observer: &dyn InterpreterObserver,
+        template: &TemplateCode,
+    ) {
         assert!(!self.current_scope.borrow().is_empty());
         if cfg!(debug_assertions) {
             println!("Running template {}", self.current_scope.borrow());
         }
-        let interpreter = self.build_interpreter(observer);
+        let interpreter = self.build_interpreter(global_data, observer);
         let env = Env::new_standard_env(self);
         interpreter.execute_instructions(&template.body, env, true);
     }
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 906517527..4afef5331 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -4,6 +4,7 @@ pub mod memory;
 pub mod observer;
 pub(crate) mod operations;
 
+use std::cell::RefCell;
 use std::vec;
 use circom_algebra::modular_arithmetic;
 use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
@@ -15,13 +16,13 @@ use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::operations::compute_offset;
-use crate::bucket_interpreter::value::Value;
-use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
-use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::bucket_interpreter::value::Value::{self, KnownBigInt, KnownU32, Unknown};
+use crate::passes::{LOOP_BODY_FN_PREFIX, GlobalPassData};
 use self::env::LibraryAccess;
 
-pub struct BucketInterpreter<'a> {
-    pub(crate) observer: &'a dyn InterpreterObserver,
+pub struct BucketInterpreter<'a, 'd> {
+    global_data: &'d RefCell<GlobalPassData>,
+    observer: &'a dyn InterpreterObserver,
     mem: &'a PassMemory,
     scope: String,
     p: BigInt,
@@ -29,9 +30,15 @@ pub struct BucketInterpreter<'a> {
 
 pub type R<'a> = (Option<Value>, Env<'a>);
 
-impl<'a> BucketInterpreter<'a> {
-    pub fn init(observer: &'a dyn InterpreterObserver, mem: &'a PassMemory, scope: String) -> Self {
+impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
+    pub fn init(
+        global_data: &'d RefCell<GlobalPassData>,
+        observer: &'a dyn InterpreterObserver,
+        mem: &'a PassMemory,
+        scope: String,
+    ) -> Self {
         BucketInterpreter {
+            global_data,
             observer,
             mem,
             scope,
@@ -227,7 +234,6 @@ impl<'a> BucketInterpreter<'a> {
                 }
             }
             AddressType::SubcmpSignal { cmp_address, .. } => {
-                println!("Load SubcmpSignal: {}", cmp_address.to_string());
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
                 let addr =
                     addr.expect("cmp_address in SubcmpSignal must produce a value!").get_u32();
@@ -312,6 +318,10 @@ impl<'a> BucketInterpreter<'a> {
                 }
             }
             AddressType::SubcmpSignal { cmp_address, input_information, .. } => {
+                println!(
+                    "cmp_address = {:?}, input_information = {:?}",
+                    cmp_address, input_information
+                );
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
                 let addr = addr
                     .expect(
@@ -412,9 +422,16 @@ impl<'a> BucketInterpreter<'a> {
     fn run_function_loopbody<'env>(&self, name: &String, env: Env<'env>, observe: bool) -> R<'env> {
         if cfg!(debug_assertions) {
             println!("Running function {}", name);
-        }
-        let mut res = (None, Env::new_extracted_func_env(env.clone()));
-        let interp = self.mem.build_interpreter_with_scope(self.observer, name.clone());
+        };
+        let mut res: R<'env> = (
+            None,
+            Env::new_extracted_func_env(
+                env.clone(),
+                self.global_data.borrow().extract_func_orig_loc[name][&env.get_vars_sort()].clone(),
+            ),
+        );
+        let interp =
+            self.mem.build_interpreter_with_scope(self.global_data, self.observer, name.clone());
         let observe = observe && !interp.observer.ignore_function_calls();
         let instructions = &env.get_function(name).body;
         unsafe {
@@ -424,7 +441,8 @@ impl<'a> BucketInterpreter<'a> {
                 res = interp.execute_instruction(inst, res.1, observe);
             }
         }
-        res
+        //Remove the Env::ExtractedFunction wrapper
+        (res.0, res.1.peel_extracted_func())
     }
 
     fn run_function_basic<'env>(&self, name: &String, args: Vec<Value>, observe: bool) -> Value {
@@ -435,7 +453,8 @@ impl<'a> BucketInterpreter<'a> {
         for (id, arg) in args.iter().enumerate() {
             new_env = new_env.set_var(id, arg.clone());
         }
-        let interp = self.mem.build_interpreter_with_scope(self.observer, name.clone());
+        let interp =
+            self.mem.build_interpreter_with_scope(self.global_data, self.observer, name.clone());
         let (v, _) = interp.execute_instructions(
             &self.mem.get_function(name).body,
             new_env,
@@ -457,11 +476,7 @@ impl<'a> BucketInterpreter<'a> {
             // The extracted loop body functions can change any values in the environment
             //  via the parameters passed to it. So interpret the function and keep the
             //  resulting Env (as if the function had executed inline).
-            // self.run_function_loopbody(&bucket.symbol, env, observe)
-            //
-            //TODO: TEMP: old approach
-            env = env.set_all_to_unk();
-            (Some(Unknown), env)
+            self.run_function_loopbody(&bucket.symbol, env, observe)
         } else {
             let mut args = vec![];
             for i in &bucket.arguments {
@@ -476,6 +491,9 @@ impl<'a> BucketInterpreter<'a> {
             };
             (Some(v), env)
         };
+        // println!("[execute_call_bucket] {:?}", bucket);
+        // println!(" -> value = {:?}", res.0);
+        // println!(" -> new env = {}", res.1);
 
         // Write the result in the destination according to the ReturnType
         match &bucket.return_info {
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 6228e89de..dcceb1335 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -10,7 +10,7 @@ use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 /// Poor man's lattice that gives up the moment values are not equal
 /// It's a join semi lattice with a top (Unknown)
 /// Not a complete lattice because there is no bottom
-#[derive(Clone, Eq, PartialEq)]
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)]
 pub enum Value {
     Unknown,
     KnownU32(usize),
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 2c6128d7f..13bdcf7bd 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -7,24 +7,26 @@ use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct ConditionalFlattening {
+pub struct ConditionalFlatteningPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     replacements: RefCell<BTreeMap<BranchBucket, bool>>,
 }
 
-impl ConditionalFlattening {
-    pub fn new(prime: &String) -> Self {
-        ConditionalFlattening {
+impl<'d> ConditionalFlatteningPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
+        ConditionalFlatteningPass {
+            global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
 }
 
-impl InterpreterObserver for ConditionalFlattening {
+impl InterpreterObserver for ConditionalFlatteningPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -74,7 +76,7 @@ impl InterpreterObserver for ConditionalFlattening {
     }
 
     fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool {
-        let interpreter = self.memory.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (_, cond_result, _) = interpreter.execute_conditional_bucket(
             &bucket.cond,
             &bucket.if_branch,
@@ -105,7 +107,7 @@ impl InterpreterObserver for ConditionalFlattening {
     }
 }
 
-impl CircuitTransformationPass for ConditionalFlattening {
+impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
     fn name(&self) -> &str {
         "ConditionalFlattening"
     }
@@ -116,7 +118,7 @@ impl CircuitTransformationPass for ConditionalFlattening {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 489719838..94f3ab03c 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -7,17 +7,19 @@ use compiler::intermediate_representation::ir_interface::StatusInput::{Last, NoL
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct DeterministicSubCmpInvokePass {
+pub struct DeterministicSubCmpInvokePass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     replacements: RefCell<BTreeMap<AddressType, StatusInput>>,
 }
 
-impl DeterministicSubCmpInvokePass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> DeterministicSubCmpInvokePass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         DeterministicSubCmpInvokePass {
+            global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
@@ -35,7 +37,7 @@ impl DeterministicSubCmpInvokePass {
         } = address_type
         {
             let env = env.clone();
-            let interpreter = self.memory.build_interpreter(self);
+            let interpreter = self.memory.build_interpreter(self.global_data, self);
             let (addr, env) = interpreter.execute_instruction(cmp_address, env, false);
             let addr = addr
                 .expect("cmp_address instruction in SubcmpSignal must produce a value!")
@@ -46,7 +48,7 @@ impl DeterministicSubCmpInvokePass {
     }
 }
 
-impl InterpreterObserver for DeterministicSubCmpInvokePass {
+impl InterpreterObserver for DeterministicSubCmpInvokePass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -123,7 +125,7 @@ impl InterpreterObserver for DeterministicSubCmpInvokePass {
     }
 }
 
-impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
+impl CircuitTransformationPass for DeterministicSubCmpInvokePass<'_> {
     fn name(&self) -> &str {
         "DeterministicSubCmpInvokePass"
     }
@@ -134,7 +136,7 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index 46640efd4..06b78b54c 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -1,20 +1,106 @@
 use std::cell::{RefCell, Ref};
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::vec;
-use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
+use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR, FR_PTR_CAST_I32_I256};
 use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::hir::very_concrete_program::Param;
 use compiler::intermediate_representation::{
     BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
 };
 use compiler::intermediate_representation::ir_interface::*;
+use indexmap::IndexSet;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
 use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
 use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
-
 use super::new_u32_value;
 
+pub type FuncArgIdx = usize;
+pub type AddressOffset = usize;
+pub type UnrolledIterLvars = BTreeMap<usize, Value>;
+pub type ToOriginalLocation = HashMap<FuncArgIdx, (AddressType, AddressOffset)>;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub enum ArgIndex {
+    Signal(FuncArgIdx),
+    SubCmp { signal: FuncArgIdx, arena: FuncArgIdx, counter: FuncArgIdx },
+}
+
+impl ArgIndex {
+    pub fn get_signal_idx(&self) -> FuncArgIdx {
+        match *self {
+            ArgIndex::Signal(signal) => signal,
+            ArgIndex::SubCmp { signal, .. } => signal,
+        }
+    }
+}
+
+/// Need this structure to skip id/metadata fields in ValueBucket when using as map key
+#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+struct SubcmpSignalHashFix {
+    cmp_address_parse_as: ValueType,
+    cmp_address_op_aux_no: usize,
+    cmp_address_value: usize,
+    uniform_parallel_value: Option<bool>,
+    is_output: bool,
+    input_information: InputInformation,
+    counter_override: bool,
+}
+
+impl SubcmpSignalHashFix {
+    fn convert(addr: &AddressType) -> SubcmpSignalHashFix {
+        if let AddressType::SubcmpSignal {
+            cmp_address,
+            uniform_parallel_value,
+            is_output,
+            input_information,
+            counter_override,
+        } = addr
+        {
+            if let Instruction::Value(ValueBucket { parse_as, op_aux_no, value, .. }) =
+                **cmp_address
+            {
+                return SubcmpSignalHashFix {
+                    cmp_address_parse_as: parse_as,
+                    cmp_address_op_aux_no: op_aux_no,
+                    cmp_address_value: value,
+                    uniform_parallel_value: uniform_parallel_value.clone(),
+                    is_output: is_output.clone(),
+                    input_information: input_information.clone(),
+                    counter_override: counter_override.clone(),
+                };
+            }
+        }
+        panic!("improper AddressType given")
+    }
+}
+
+struct ExtraArgsResult {
+    bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>>,
+    bucket_to_args: HashMap<BucketId, ArgIndex>,
+    num_args: usize,
+}
+
+impl ExtraArgsResult {
+    fn get_passing_refs_for_itr(
+        &self,
+        iter_num: usize,
+    ) -> Vec<(&(AddressType, AddressOffset), ArgIndex)> {
+        self.bucket_to_itr_to_ref
+            .iter()
+            .map(|(k, v)| (v[iter_num].as_ref().unwrap(), self.bucket_to_args[k]))
+            .collect()
+    }
+
+    fn get_reverse_passing_refs_for_itr(&self, iter_num: usize) -> ToOriginalLocation {
+        self.bucket_to_itr_to_ref.iter().fold(ToOriginalLocation::new(), |mut acc, (k, v)| {
+            let (addr_ty, addr_offset) = v[iter_num].as_ref().unwrap();
+            acc.insert(self.bucket_to_args[k].get_signal_idx(), (addr_ty.clone(), *addr_offset));
+            acc
+        })
+    }
+}
+
 #[derive(Clone, Debug, Eq, PartialEq, Default)]
 pub struct LoopBodyExtractor {
     new_body_functions: RefCell<Vec<FunctionCode>>,
@@ -25,34 +111,55 @@ impl LoopBodyExtractor {
         self.new_body_functions.borrow()
     }
 
-    pub fn extract(
+    pub fn extract<'a>(
         &self,
         bucket: &LoopBucket,
-        recorder: &EnvRecorder,
+        recorder: &'a EnvRecorder<'a, '_>,
         unrolled: &mut InstructionList,
     ) {
         assert!(bucket.body.len() > 1);
-        let (iter_to_loc, mut bucket_arg_order) = Self::compute_extra_args(&recorder);
-        let name = self.build_new_body(bucket, &mut bucket_arg_order);
+        let extra_arg_info = Self::compute_extra_args(&recorder);
+        let name = self.build_new_body(
+            bucket,
+            extra_arg_info.bucket_to_args.clone(),
+            extra_arg_info.num_args,
+        );
         for iter_num in 0..recorder.get_iter() {
             // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
             //  within the current body. However, it doesn't actually need to generate a load
             //  instruction to use these pointers as parameters to the function so we must use the
             //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
             //  the "loading" (but really it just returns the pointer that was passed in).
-            let mut args = InstructionList::default();
+            let mut args = Self::new_filled_vec(
+                extra_arg_info.num_args,
+                Box::new(Instruction::Nop(NopBucket { id: 0 })),
+            );
             // Parameter for local vars
-            args.push(Self::new_storage_ptr_ref(bucket, AddressType::Variable));
+            args[0] = Self::new_storage_ptr_ref(bucket, AddressType::Variable);
             // Parameter for signals/arena
-            args.push(Self::new_storage_ptr_ref(bucket, AddressType::Signal));
-            // Additional parameters for variant vector/array access within the loop
-            if !iter_to_loc.is_empty() {
-                for a in &iter_to_loc[&iter_num] {
-                    args.push(Self::new_indexed_storage_ptr_ref(
-                        bucket,
-                        a.0.clone(),
-                        a.1.get_u32(),
-                    ));
+            args[1] = Self::new_storage_ptr_ref(bucket, AddressType::Signal);
+            // Additional parameters for subcmps and variant array indexing within the loop
+            for ((at, val), ai) in extra_arg_info.get_passing_refs_for_itr(iter_num) {
+                match ai {
+                    ArgIndex::Signal(signal) => {
+                        args[signal] = Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val)
+                    }
+                    ArgIndex::SubCmp { signal, arena, counter } => {
+                        // Pass entire subcomponent arena for calling the 'template_run' function
+                        args[arena] = Self::new_storage_ptr_ref(bucket, at.clone());
+                        // Pass specific signal referenced
+                        args[signal] = Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val);
+                        // Pass subcomponent counter reference
+                        if let AddressType::SubcmpSignal { cmp_address, .. } = &at {
+                            //TODO: may only need to add this when is_output=true but have to skip adding the Param too in that case.
+                            args[counter] = Self::new_subcmp_counter_storage_ptr_ref(
+                                bucket,
+                                cmp_address.clone(),
+                            );
+                        } else {
+                            unreachable!()
+                        }
+                    }
                 }
             }
             unrolled.push(
@@ -69,38 +176,66 @@ impl LoopBodyExtractor {
                 }
                 .allocate(),
             );
+
+            recorder.record_reverse_arg_mapping(
+                name.clone(),
+                recorder.get_vals_per_iter().get(&iter_num).unwrap().env_at_header.get_vars_sort(),
+                extra_arg_info.get_reverse_passing_refs_for_itr(iter_num),
+            );
         }
     }
 
     fn build_new_body(
         &self,
         bucket: &LoopBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        mut bucket_to_args: HashMap<BucketId, ArgIndex>,
+        num_args: usize,
     ) -> String {
-        // NOTE: must create parameter list before 'bucket_arg_order' is modified
-        let mut params = vec![
-            Param { name: String::from("lvars"), length: vec![0] },
-            Param { name: String::from("signals"), length: vec![0] },
-        ];
-        for i in 0..bucket_arg_order.len() {
-            // Use empty vector for the length to denote scalar (non-array) arguments
-            params.push(Param { name: format!("fixed_{}", i), length: vec![] });
+        // NOTE: must create parameter list before 'bucket_to_args' is modified
+        // Since the ArgIndex instances could have indices in any random order,
+        //  create the vector of required size and then set elements by index.
+        let mut params = Self::new_filled_vec(
+            num_args,
+            Param { name: String::from("EMPTY"), length: vec![usize::MAX] },
+        );
+        params[0] = Param { name: String::from("lvars"), length: vec![0] };
+        params[1] = Param { name: String::from("signals"), length: vec![0] };
+        for (i, arg_index) in bucket_to_args.values().enumerate() {
+            match arg_index {
+                ArgIndex::Signal(signal) => {
+                    //Single signal uses scalar pointer
+                    params[*signal] = Param { name: format!("fix_{}", i), length: vec![] };
+                }
+                ArgIndex::SubCmp { signal, arena, counter } => {
+                    //Subcomponent arena requires array pointer but the others are scalar
+                    params[*arena] = Param { name: format!("sub_{}", i), length: vec![0] };
+                    params[*signal] = Param { name: format!("subfix_{}", i), length: vec![] };
+                    params[*counter] = Param { name: format!("subc_{}", i), length: vec![] };
+                }
+            }
         }
 
         // Copy loop body and add a "return void" at the end
         let mut new_body = vec![];
         for s in &bucket.body {
             let mut copy: InstructionPointer = s.clone();
-            if !bucket_arg_order.is_empty() {
-                //Traverse each cloned statement before calling `update_id()` and replace the
-                //  old location reference with reference to the proper argument. Mappings are
-                //  removed as they are processed so no change is needed once the map is empty.
-                ExtractedFunctionLocationUpdater::check_instruction(&mut copy, bucket_arg_order);
-            }
+            //Traverse each cloned statement before calling `update_id()` and replace the
+            //  old location reference with reference to the proper argument. Mappings are
+            //  removed as they are processed so no change is needed once the map is empty.
+            let suffix = if !bucket_to_args.is_empty() {
+                let mut upd = ExtractedFunctionLocationUpdater::new();
+                upd.check_instruction(&mut copy, &mut bucket_to_args);
+                upd.insert_after
+            } else {
+                InstructionList::default()
+            };
             copy.update_id();
             new_body.push(copy);
+            for s in suffix {
+                new_body.push(s);
+            }
         }
-        assert!(bucket_arg_order.is_empty());
+        assert!(bucket_to_args.is_empty());
         new_body.push(
             ReturnBucket {
                 id: new_id(),
@@ -164,7 +299,7 @@ impl LoopBodyExtractor {
     fn new_indexed_storage_ptr_ref(
         bucket: &dyn ObtainMeta,
         addr_type: AddressType,
-        index: usize,
+        index: AddressOffset,
     ) -> InstructionPointer {
         CallBucket {
             id: new_id(),
@@ -183,79 +318,153 @@ impl LoopBodyExtractor {
         .allocate()
     }
 
-    fn is_all_same(data: &[usize]) -> bool {
-        data.iter()
-            .fold((true, None), {
-                |acc, elem| {
-                    if acc.1.is_some() {
-                        (acc.0 && (acc.1.unwrap() == elem), Some(elem))
-                    } else {
-                        (true, Some(elem))
-                    }
+    fn new_subcmp_counter_storage_ptr_ref(
+        bucket: &dyn ObtainMeta,
+        sub_cmp_id: InstructionPointer,
+    ) -> InstructionPointer {
+        Self::new_custom_fn_load_bucket(
+            bucket,
+            FR_PTR_CAST_I32_I256,
+            AddressType::SubcmpSignal {
+                cmp_address: sub_cmp_id,
+                uniform_parallel_value: Option::None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+                counter_override: true,
+            },
+            new_u32_value(bucket, usize::MAX), //index is ignored for these
+        )
+    }
+
+    fn all_same<T>(data: T) -> bool
+    where
+        T: Iterator,
+        T::Item: PartialEq,
+    {
+        data.fold((true, None), {
+            |acc, elem| {
+                if acc.1.is_some() {
+                    (acc.0 && (acc.1.unwrap() == elem), Some(elem))
+                } else {
+                    (true, Some(elem))
                 }
-            })
-            .0
+            }
+        })
+        .0
     }
 
     // Key for the returned map is iteration number.
-    // The BTreeMap that is returned maps bucket to fixed* argument index.
-    fn compute_extra_args(
-        recorder: &EnvRecorder,
-    ) -> (HashMap<usize, Vec<(AddressType, Value)>>, BTreeMap<BucketId, usize>) {
-        let mut iter_to_loc: HashMap<usize, Vec<(AddressType, Value)>> = HashMap::default();
-        let mut bucket_arg_order = BTreeMap::new();
-        let vpi = recorder.vals_per_iteration.borrow();
-        let all_loadstore_bucket_ids: HashSet<&BucketId> =
+    // The HashMap that is returned maps bucket to fixed* argument index.
+    fn compute_extra_args<'a>(recorder: &'a EnvRecorder<'a, '_>) -> ExtraArgsResult {
+        // Table structure indexed first by load/store BucketId, then by iteration number.
+        //  View the first (BucketId) as columns and the second (iteration number) as rows.
+        //  The data reference is wrapped in Option to allow for some iterations that don't
+        //  execute a specific bucket due to conditional branches within the loop body.
+        //  When comparing values across iterations, ignore those cases where there is no
+        //  value for a certain iteration and only check among those iterations that have a
+        //  value because it doesn't matter what parameter is passed in for those iterations
+        //  that do not execute that specific bucket. This is the reason it was important to
+        //  store Unknown values in the `loadstore_to_index` index as well, so they are not
+        //  confused with values that simply don't exist.
+        let mut bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>> =
+            HashMap::new();
+        //
+        let mut bucket_to_args: HashMap<BucketId, ArgIndex> = HashMap::new();
+        let vpi = recorder.get_vals_per_iter();
+        // NOTE: starts at 2 because the current component's signal arena and lvars are first.
+        let mut next_idx: FuncArgIdx = 2;
+        // First step is to collect all location references into the 'bucket_to_itr_to_ref' table.
+        // NOTE: collect to IndexSet to preserve insertion order to stabilize test output.
+        let all_loadstore_bucket_ids: IndexSet<&BucketId> =
             vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
-        // println!("all_loadstore_bucket_ids = {:?}", all_loadstore_bucket_ids);
         for id in all_loadstore_bucket_ids {
-            // Check if the computed index value is the same across all iterations for this BucketId.
-            //  If it is not the same in all iterations, then it needs to be passed as a separate
-            //  parameter to the new function.
-            // NOTE: Some iterations of the loop may have no mapping for certain BucketIds because
-            //  conditional branches can make certain buckets unused in some iterations. Just ignore
-            //  those cases where there is no value for a certain iteration and check among those
-            //  iterations that have a value. This is the reason it was important to store Unknown
-            //  values in the `loadstore_to_index` index as well, so they are not confused with
-            //  missing values.
-            let mut next_iter_to_store = 0;
-            let mut prev_val = None;
-            for curr_iter in 0..recorder.get_iter() {
-                let curr_val = vpi[&curr_iter].loadstore_to_index.get(id);
-                if curr_val.is_some() {
-                    if prev_val.is_none() {
-                        //initial state
-                        prev_val = curr_val;
-                    } else {
-                        assert!(prev_val.is_some() && curr_val.is_some());
-                        let prev_val_pair = prev_val.unwrap();
-                        let curr_val_pair = curr_val.unwrap();
-                        assert_eq!(prev_val_pair.0, curr_val_pair.0); //AddressType always matches
-                        if !Value::eq(&prev_val_pair.1, &curr_val_pair.1) {
-                            assert!(!prev_val_pair.1.is_unknown() && !curr_val_pair.1.is_unknown());
-                            // Store current Value for current iteration
-                            iter_to_loc.entry(curr_iter).or_default().push(curr_val_pair.clone());
-                            // Store previous Value for all iterations that did have the same
-                            //  value (or None) and have not yet been stored.
-                            for j in next_iter_to_store..curr_iter {
-                                iter_to_loc.entry(j).or_default().push(prev_val_pair.clone());
-                            }
-                            // Update for next iteration
-                            next_iter_to_store = curr_iter + 1;
-                            prev_val = curr_val;
+            let column = bucket_to_itr_to_ref.entry(*id).or_default();
+            for iter_num in 0..recorder.get_iter() {
+                let temp = vpi[&iter_num].loadstore_to_index.get(id);
+                // ASSERT: index values are known in every (available) iteration
+                assert!(temp.is_none() || !temp.unwrap().1.is_unknown());
+                column.push(temp.map(|(a, v)| (a.clone(), v.get_u32())));
+            }
+            // ASSERT: same AddressType kind for this bucket in every (available) iteration
+            assert!(Self::all_same(
+                column.iter().filter_map(|x| x.as_ref()).map(|x| std::mem::discriminant(&x.0))
+            ));
+
+            // Check if the computed index value for this bucket is the same across all iterations (where it is
+            //  not None, see earlier comment). If it is not, then an extra function argument is needed for it.
+            //  Actually, check not only the computed index Value but the AddressType as well to capture when
+            //  it's a SubcmpSignal referencing a different subcomponent (the AddressType::cmp_address field
+            //  was also interpreted within the EnvRecorder so this comparison will be accurate).
+            if !Self::all_same(column.iter().filter_map(|x| x.as_ref())) {
+                bucket_to_args.insert(*id, ArgIndex::Signal(next_idx));
+                next_idx += 1;
+            }
+        }
+        //ASSERT: All columns have the same length (i.e. the number of iterations)
+        assert!(bucket_to_itr_to_ref.values().all(|x| x.len() == recorder.get_iter()));
+
+        // Also, if it's a subcomponent reference, then extra arguments are needed for it's
+        //  signal arena and counter (because subcomponents are not included by default like
+        //  the current component's signal arena and lvars are).
+        // Find groups of BucketId that use the same SubcmpSignal (to reduce number of arguments).
+        //  A group must have this same property in all iterations in order to be safe to combine.
+        let mut safe_groups: BTreeMap<SubcmpSignalHashFix, HashSet<BucketId>> = Default::default();
+        for iter_num in 0..recorder.get_iter() {
+            let grps: BTreeMap<SubcmpSignalHashFix, HashSet<BucketId>> = bucket_to_itr_to_ref
+                .iter()
+                .map(|(k, col)| (k, &col[iter_num]))
+                .fold(BTreeMap::new(), |mut r, (b, a)| {
+                    if let Some((at, _)) = a {
+                        if let AddressType::SubcmpSignal { .. } = at {
+                            r.entry(SubcmpSignalHashFix::convert(&at)).or_default().insert(*b);
                         }
                     }
-                }
+                    r
+                });
+            if iter_num == 0 {
+                safe_groups = grps;
+            } else {
+                safe_groups.retain(|_, v| grps.values().any(|x| x == v));
+            }
+            if safe_groups.is_empty() {
+                break;
             }
-            //ASSERT: All vectors have the same length at the end of each iteration
-            assert!(Self::is_all_same(&iter_to_loc.values().map(|x| x.len()).collect::<Vec<_>>()));
-            //ASSERT: Value was added for every iteration or for no iterations
-            assert!(next_iter_to_store == 0 || next_iter_to_store == recorder.get_iter());
-            //
-            if next_iter_to_store != 0 {
-                bucket_arg_order.insert(id.clone(), bucket_arg_order.len());
+        }
+        for (_, buckets) in safe_groups.iter() {
+            let arena_idx: FuncArgIdx = next_idx;
+            let counter_idx: FuncArgIdx = next_idx + 1;
+            next_idx += 2;
+            for b in buckets {
+                if let Some(ArgIndex::Signal(sig)) = bucket_to_args.get(b) {
+                    bucket_to_args.insert(
+                        *b,
+                        ArgIndex::SubCmp { signal: *sig, arena: arena_idx, counter: counter_idx },
+                    );
+                } else {
+                    //TODO: What to do when the signal index w/in the subcomp was not variant?
+                    //  Should I just add a parameter anyway? It doesn't hurt to do that so
+                    //  I guess that's the approach to take for now.
+                    bucket_to_args.insert(
+                        *b,
+                        ArgIndex::SubCmp {
+                            signal: next_idx,
+                            arena: arena_idx,
+                            counter: counter_idx,
+                        },
+                    );
+                    next_idx += 1;
+                }
             }
         }
-        (iter_to_loc, bucket_arg_order)
+
+        //Keep only the table columns where extra parameters are necessary
+        bucket_to_itr_to_ref.retain(|k, _| bucket_to_args.contains_key(k));
+        ExtraArgsResult { bucket_to_itr_to_ref, bucket_to_args, num_args: next_idx }
+    }
+
+    fn new_filled_vec<T: Clone>(new_len: usize, value: T) -> Vec<T> {
+        let mut result = Vec::with_capacity(new_len);
+        result.resize(new_len, value);
+        result
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index 563d996c7..d0e599021 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -1,23 +1,32 @@
-use std::collections::BTreeMap;
-use compiler::intermediate_representation::{BucketId, InstructionPointer};
+use std::collections::HashMap;
+use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_PTR;
+use compiler::intermediate_representation::{BucketId, InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
+use super::body_extractor::ArgIndex;
 use super::new_u32_value;
 
-pub struct ExtractedFunctionLocationUpdater {}
+pub struct ExtractedFunctionLocationUpdater {
+    pub insert_after: InstructionList,
+}
 
 impl ExtractedFunctionLocationUpdater {
+    pub fn new() -> ExtractedFunctionLocationUpdater {
+        ExtractedFunctionLocationUpdater { insert_after: Default::default() }
+    }
+
     fn check_load_bucket(
+        &mut self,
         bucket: &mut LoadBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
-            // Update the destination information to reference the argument
+        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+            // Update the location information to reference the argument
             //NOTE: This can't use AddressType::Variable or AddressType::Signal
             //  because ExtractedFunctionLLVMIRProducer references the first two
             //  parameters with those. So this has to use SubcmpSignal (it should
             //  work fine because subcomps will also just be additional params).
             bucket.address_type = AddressType::SubcmpSignal {
-                cmp_address: new_u32_value(bucket, x),
+                cmp_address: new_u32_value(bucket, ai.get_signal_idx()),
                 uniform_parallel_value: None,
                 counter_override: false,
                 is_output: false,
@@ -29,22 +38,69 @@ impl ExtractedFunctionLocationUpdater {
             };
         } else {
             // If not replacing, check deeper in the AddressType and LocationRule
-            Self::check_address_type(&mut bucket.address_type, bucket_arg_order);
-            Self::check_location_rule(&mut bucket.src, bucket_arg_order);
+            self.check_address_type(&mut bucket.address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.src, bucket_arg_order);
         }
     }
 
     fn check_store_bucket(
+        &mut self,
         bucket: &mut StoreBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
         // Check the source/RHS of the store in either case
-        Self::check_instruction(&mut bucket.src, bucket_arg_order);
+        self.check_instruction(&mut bucket.src, bucket_arg_order);
         //
-        if let Some(x) = bucket_arg_order.remove(&bucket.id) {
-            // Update the destination information to reference the argument
+        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+            // If needed, add a StoreBucket to 'insert_after' that will call the template_run function.
+            // NOTE: This must happen before the modification step so it can use existing values from the bucket.
+            if let ArgIndex::SubCmp { arena, .. } = ai {
+                self.insert_after.push(
+                    StoreBucket {
+                        id: new_id(),
+                        source_file_id: bucket.source_file_id.clone(),
+                        line: bucket.line,
+                        message_id: bucket.message_id,
+                        context: bucket.context.clone(),
+                        dest_is_output: bucket.dest_is_output,
+                        dest_address_type: AddressType::SubcmpSignal {
+                            cmp_address: new_u32_value(bucket, arena),
+                            uniform_parallel_value: None,
+                            counter_override: false,
+                            is_output: false,
+                            //TODO: Not sure what to put here. If I put Unknown (assuming the later pass
+                            //  would correct) it crashes somewhere. What I really need is Last in the
+                            //  proper place to make it generate the *_run function at the right time
+                            //  but NoLast in locations prior to that (I think). Why isn't Unknown handled
+                            //  by the later pass deterministic subcomp pass or something? Always using
+                            //  Last here could result in the run function being called too soon.
+                            //SEE: circom/tests/subcmps/subcmps0C.circom
+                            input_information: InputInformation::Input {
+                                status: StatusInput::Last,
+                            },
+                        },
+                        dest: LocationRule::Indexed {
+                            location: new_u32_value(bucket, 0), //the value here is ignored by the 'bounded_fn' below
+                            template_header: match &bucket.dest {
+                                LocationRule::Indexed { template_header, .. } => {
+                                    template_header.clone()
+                                }
+                                LocationRule::Mapped { .. } => todo!(),
+                            },
+                        },
+                        src: new_u32_value(bucket, 0), //the value here is ignored at runtime
+                        bounded_fn: Some(String::from(FR_IDENTITY_ARR_PTR)), //NOTE: doesn't have enough arguments but it works out
+                    }
+                    .allocate(),
+                );
+                // NOTE: Not adding counter for now because it shouldn't be needed anyway and it's more work to add.
+                //  The best approach would probably be to generate Load+Compute+Store (based on what StoreBucket
+                //  would normally generate for it) in an "insert_before" list just like the "insert_after" list.
+            }
+
+            //Transform this bucket into the normal fixed-index signal reference
             bucket.dest_address_type = AddressType::SubcmpSignal {
-                cmp_address: new_u32_value(bucket, x),
+                cmp_address: new_u32_value(bucket, ai.get_signal_idx()),
                 uniform_parallel_value: None,
                 counter_override: false,
                 is_output: false,
@@ -56,67 +112,73 @@ impl ExtractedFunctionLocationUpdater {
             };
         } else {
             // If not replacing, check deeper in the AddressType and LocationRule
-            Self::check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
-            Self::check_location_rule(&mut bucket.dest, bucket_arg_order);
+            self.check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.dest, bucket_arg_order);
         }
     }
 
     fn check_location_rule(
+        &mut self,
         location_rule: &mut LocationRule,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
         match location_rule {
             LocationRule::Indexed { location, .. } => {
-                Self::check_instruction(location, bucket_arg_order);
+                self.check_instruction(location, bucket_arg_order);
             }
             LocationRule::Mapped { .. } => unreachable!(),
         }
     }
 
     fn check_address_type(
+        &mut self,
         addr_type: &mut AddressType,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
         if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
-            Self::check_instruction(cmp_address, bucket_arg_order);
+            self.check_instruction(cmp_address, bucket_arg_order);
         }
     }
 
     fn check_compute_bucket(
+        &mut self,
         bucket: &mut ComputeBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        for i in &mut bucket.stack {
-            Self::check_instruction(i, bucket_arg_order);
-        }
+        self.check_instructions(&mut bucket.stack, bucket_arg_order);
     }
 
     fn check_assert_bucket(
+        &mut self,
         bucket: &mut AssertBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        Self::check_instruction(&mut bucket.evaluate, bucket_arg_order);
+        self.check_instruction(&mut bucket.evaluate, bucket_arg_order);
     }
 
     fn check_loop_bucket(
+        &mut self,
         bucket: &mut LoopBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        todo!()
+        self.check_instruction(&mut bucket.continue_condition, bucket_arg_order);
+        self.check_instructions(&mut bucket.body, bucket_arg_order);
     }
 
     fn check_create_cmp_bucket(
+        &mut self,
         bucket: &mut CreateCmpBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        todo!()
+        self.check_instruction(&mut bucket.sub_cmp_id, bucket_arg_order);
     }
 
     fn check_constraint_bucket(
+        &mut self,
         bucket: &mut ConstraintBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        Self::check_instruction(
+        self.check_instruction(
             match bucket {
                 ConstraintBucket::Substitution(i) => i,
                 ConstraintBucket::Equality(i) => i,
@@ -126,66 +188,85 @@ impl ExtractedFunctionLocationUpdater {
     }
 
     fn check_block_bucket(
+        &mut self,
         bucket: &mut BlockBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        todo!()
+        self.check_instructions(&mut bucket.body, bucket_arg_order);
     }
 
     fn check_call_bucket(
+        &mut self,
         bucket: &mut CallBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        todo!()
+        self.check_instructions(&mut bucket.arguments, bucket_arg_order);
     }
 
     fn check_branch_bucket(
+        &mut self,
         bucket: &mut BranchBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        todo!()
+        self.check_instruction(&mut bucket.cond, bucket_arg_order);
+        self.check_instructions(&mut bucket.if_branch, bucket_arg_order);
+        self.check_instructions(&mut bucket.else_branch, bucket_arg_order);
     }
 
     fn check_return_bucket(
+        &mut self,
         bucket: &mut ReturnBucket,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
-        Self::check_instruction(&mut bucket.value, bucket_arg_order);
+        self.check_instruction(&mut bucket.value, bucket_arg_order);
     }
 
-    fn check_log_bucket(bucket: &mut LogBucket, bucket_arg_order: &mut BTreeMap<BucketId, usize>) {
+    fn check_log_bucket(
+        &mut self,
+        bucket: &mut LogBucket,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+    ) {
         for arg in &mut bucket.argsprint {
             if let LogBucketArg::LogExp(i) = arg {
-                Self::check_instruction(i, bucket_arg_order);
+                self.check_instruction(i, bucket_arg_order);
             }
         }
     }
 
     //Nothing to do
-    fn check_value_bucket(_: &mut ValueBucket, _: &mut BTreeMap<BucketId, usize>) {}
-    fn check_nop_bucket(_: &mut NopBucket, _: &mut BTreeMap<BucketId, usize>) {}
+    fn check_value_bucket(&mut self, _: &mut ValueBucket, _: &mut HashMap<BucketId, ArgIndex>) {}
+    fn check_nop_bucket(&mut self, _: &mut NopBucket, _: &mut HashMap<BucketId, ArgIndex>) {}
+
+    fn check_instructions(
+        &mut self,
+        insts: &mut Vec<InstructionPointer>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+    ) {
+        for i in insts {
+            self.check_instruction(i, bucket_arg_order);
+        }
+    }
 
     pub fn check_instruction(
+        &mut self,
         inst: &mut InstructionPointer,
-        bucket_arg_order: &mut BTreeMap<BucketId, usize>,
+        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
     ) {
         match inst.as_mut() {
-            Instruction::Value(ref mut b) => Self::check_value_bucket(b, bucket_arg_order),
-            Instruction::Load(ref mut b) => Self::check_load_bucket(b, bucket_arg_order),
-            Instruction::Store(ref mut b) => Self::check_store_bucket(b, bucket_arg_order),
-            Instruction::Compute(ref mut b) => Self::check_compute_bucket(b, bucket_arg_order),
-            Instruction::Call(ref mut b) => Self::check_call_bucket(b, bucket_arg_order),
-            Instruction::Branch(ref mut b) => Self::check_branch_bucket(b, bucket_arg_order),
-            Instruction::Return(ref mut b) => Self::check_return_bucket(b, bucket_arg_order),
-            Instruction::Assert(ref mut b) => Self::check_assert_bucket(b, bucket_arg_order),
-            Instruction::Log(ref mut b) => Self::check_log_bucket(b, bucket_arg_order),
-            Instruction::Loop(ref mut b) => Self::check_loop_bucket(b, bucket_arg_order),
-            Instruction::CreateCmp(ref mut b) => Self::check_create_cmp_bucket(b, bucket_arg_order),
-            Instruction::Constraint(ref mut b) => {
-                Self::check_constraint_bucket(b, bucket_arg_order)
-            }
-            Instruction::Block(ref mut b) => Self::check_block_bucket(b, bucket_arg_order),
-            Instruction::Nop(ref mut b) => Self::check_nop_bucket(b, bucket_arg_order),
+            Instruction::Value(ref mut b) => self.check_value_bucket(b, bucket_arg_order),
+            Instruction::Load(ref mut b) => self.check_load_bucket(b, bucket_arg_order),
+            Instruction::Store(ref mut b) => self.check_store_bucket(b, bucket_arg_order),
+            Instruction::Compute(ref mut b) => self.check_compute_bucket(b, bucket_arg_order),
+            Instruction::Call(ref mut b) => self.check_call_bucket(b, bucket_arg_order),
+            Instruction::Branch(ref mut b) => self.check_branch_bucket(b, bucket_arg_order),
+            Instruction::Return(ref mut b) => self.check_return_bucket(b, bucket_arg_order),
+            Instruction::Assert(ref mut b) => self.check_assert_bucket(b, bucket_arg_order),
+            Instruction::Log(ref mut b) => self.check_log_bucket(b, bucket_arg_order),
+            Instruction::Loop(ref mut b) => self.check_loop_bucket(b, bucket_arg_order),
+            Instruction::CreateCmp(ref mut b) => self.check_create_cmp_bucket(b, bucket_arg_order),
+            Instruction::Constraint(ref mut b) => self.check_constraint_bucket(b, bucket_arg_order),
+            Instruction::Block(ref mut b) => self.check_block_bucket(b, bucket_arg_order),
+            Instruction::Nop(ref mut b) => self.check_nop_bucket(b, bucket_arg_order),
         }
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 6895f434d..1dad55b28 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -1,17 +1,22 @@
-use std::cell::RefCell;
+use std::cell::{RefCell, Ref};
 use std::collections::HashMap;
 use std::fmt::{Debug, Formatter};
+use indexmap::IndexMap;
 use compiler::intermediate_representation::BucketId;
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
+use crate::passes::GlobalPassData;
+use super::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
 
 /// Holds values of index variables at array loads/stores within a loop
 pub struct VariableValues<'a> {
     pub env_at_header: Env<'a>,
-    pub loadstore_to_index: HashMap<BucketId, (AddressType, Value)>, // key is load/store bucket ID
+    /// The key is the ID of the load/store bucket where the reference is located.
+    /// NOTE: uses IndexMap to preserve insertion order to stabilize test output.
+    pub loadstore_to_index: IndexMap<BucketId, (AddressType, Value)>,
 }
 
 impl<'a> VariableValues<'a> {
@@ -31,16 +36,17 @@ impl Debug for VariableValues<'_> {
     }
 }
 
-pub struct EnvRecorder<'a> {
+pub struct EnvRecorder<'a, 'd> {
+    global_data: &'d RefCell<GlobalPassData>,
     mem: &'a PassMemory,
     // NOTE: RefCell is needed here because the instance of this struct is borrowed by
     //  the main interpreter while we also need to mutate these internal structures.
     current_iter_num: RefCell<usize>,
     safe_to_move: RefCell<bool>,
-    pub vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
+    vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
 }
 
-impl Debug for EnvRecorder<'_> {
+impl Debug for EnvRecorder<'_, '_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
@@ -52,9 +58,10 @@ impl Debug for EnvRecorder<'_> {
     }
 }
 
-impl<'a> EnvRecorder<'a> {
-    pub fn new(mem: &'a PassMemory) -> Self {
+impl<'a, 'd> EnvRecorder<'a, 'd> {
+    pub fn new(global_data: &'d RefCell<GlobalPassData>, mem: &'a PassMemory) -> Self {
         EnvRecorder {
+            global_data,
             mem,
             vals_per_iteration: Default::default(),
             current_iter_num: RefCell::new(0),
@@ -62,6 +69,10 @@ impl<'a> EnvRecorder<'a> {
         }
     }
 
+    pub fn get_vals_per_iter(&self) -> Ref<HashMap<usize, VariableValues<'a>>> {
+        self.vals_per_iteration.borrow()
+    }
+
     pub fn is_safe_to_move(&self) -> bool {
         *self.safe_to_move.borrow()
     }
@@ -86,6 +97,20 @@ impl<'a> EnvRecorder<'a> {
         self.vals_per_iteration.borrow().get(&iter).unwrap().env_at_header.clone()
     }
 
+    pub fn record_reverse_arg_mapping(
+        &self,
+        extract_func: String,
+        iter_env: UnrolledIterLvars,
+        value: ToOriginalLocation,
+    ) {
+        self.global_data
+            .borrow_mut()
+            .extract_func_orig_loc
+            .entry(extract_func)
+            .or_default()
+            .insert(iter_env, value);
+    }
+
     fn record_memloc_at_bucket(&self, bucket_id: &BucketId, addr_ty: AddressType, val: Value) {
         let iter = self.get_iter();
         assert!(self.vals_per_iteration.borrow().contains_key(&iter));
@@ -97,52 +122,77 @@ impl<'a> EnvRecorder<'a> {
             .insert(*bucket_id, (addr_ty, val));
     }
 
-    fn compute_index(&self, loc: &LocationRule, env: &Env) -> Value {
-        match loc {
-            LocationRule::Mapped { .. } => {
-                todo!(); //not sure if/how to handle that
-            }
-            LocationRule::Indexed { location, .. } => {
-                // Evaluate the index using the current environment and using the environment from the
-                //  loop header. If either is Unknown or they do not give the same value, then it is
-                //  not safe to move the loop body to another function because the index computation may
-                //  not give the same result when done at the call site, outside of the new function.
-                let interp = self.mem.build_interpreter(self);
-                let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
-                // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
-                if let Some(idx_loc) = idx_loc {
-                    let (idx_header, _) =
-                        interp.execute_instruction(location, self.get_header_env_clone(), false);
-                    if let Some(idx_header) = idx_header {
-                        if Value::eq(&idx_header, &idx_loc) {
-                            return idx_loc;
-                        }
-                    }
+    fn compute_index_from_inst(&self, env: &Env, location: &InstructionPointer) -> Value {
+        // Evaluate the index using the current environment and using the environment from the
+        //  loop header. If either is Unknown or they do not give the same value, then it is
+        //  not safe to move the loop body to another function because the index computation may
+        //  not give the same result when done at the call site, outside of the new function.
+        let interp = self.mem.build_interpreter(self.global_data, self);
+        let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
+        // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
+        if let Some(idx_loc) = idx_loc {
+            let (idx_header, _) =
+                interp.execute_instruction(location, self.get_header_env_clone(), false);
+            if let Some(idx_header) = idx_header {
+                if Value::eq(&idx_header, &idx_loc) {
+                    return idx_loc;
                 }
-                Value::Unknown
             }
         }
+        Value::Unknown
     }
 
-    fn check(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
-        let val_result = self.compute_index(loc, env);
-        if val_result == Value::Unknown {
-            println!("NOT safe to move {}: {:?}[{:?}]", bucket_id, addr_ty, loc); //TODO: TEMP
+    fn compute_index_from_rule(&self, env: &Env, loc: &LocationRule) -> Value {
+        match loc {
+            LocationRule::Mapped { .. } => todo!(), //not sure if/how to handle that
+            LocationRule::Indexed { location, .. } => self.compute_index_from_inst(env, location),
+        }
+    }
+
+    fn visit(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
+        let loc_result = self.compute_index_from_rule(env, loc);
+        if loc_result == Value::Unknown {
             self.safe_to_move.replace(false);
         }
-        //NOTE: must record even when Unknown to ensure that Unknown
-        //  value is not confused with missing values for an iteration
-        //  that can be caused by conditionals within the loop.
-        self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), val_result);
+        //NOTE: must record even when Unknown to ensure that Unknown value is not confused with
+        //  missing values for an iteration that can be caused by conditionals within the loop.
+        if let AddressType::SubcmpSignal {
+            cmp_address,
+            uniform_parallel_value,
+            is_output,
+            input_information,
+            counter_override,
+        } = addr_ty
+        {
+            let addr_result = self.compute_index_from_inst(env, cmp_address);
+            self.record_memloc_at_bucket(
+                bucket_id,
+                AddressType::SubcmpSignal {
+                    cmp_address: {
+                        if addr_result == Value::Unknown {
+                            self.safe_to_move.replace(false);
+                        }
+                        addr_result.to_value_bucket(self.mem).allocate()
+                    },
+                    uniform_parallel_value: uniform_parallel_value.clone(),
+                    is_output: *is_output,
+                    input_information: input_information.clone(),
+                    counter_override: *counter_override,
+                },
+                loc_result,
+            );
+        } else {
+            self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), loc_result);
+        }
     }
 }
 
-impl InterpreterObserver for EnvRecorder<'_> {
+impl InterpreterObserver for EnvRecorder<'_, '_> {
     fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
         if let Some(_) = bucket.bounded_fn {
             todo!(); //not sure if/how to handle that
         }
-        self.check(&bucket.id, &bucket.address_type, &bucket.src, env);
+        self.visit(&bucket.id, &bucket.address_type, &bucket.src, env);
         true
     }
 
@@ -150,7 +200,7 @@ impl InterpreterObserver for EnvRecorder<'_> {
         if let Some(_) = bucket.bounded_fn {
             todo!(); //not sure if/how to handle that
         }
-        self.check(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
+        self.visit(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
         true
     }
 
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index b3c136dc4..cff56cc1a 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -14,9 +14,8 @@ use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
 use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
-
+use super::{CircuitTransformationPass, GlobalPassData};
 use self::body_extractor::LoopBodyExtractor;
 
 const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
@@ -34,16 +33,18 @@ pub fn new_u32_value(bucket: &dyn ObtainMeta, val: usize) -> InstructionPointer
     .allocate()
 }
 
-pub struct LoopUnrollPass {
+pub struct LoopUnrollPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     memory: PassMemory,
     extractor: LoopBodyExtractor,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
 }
 
-impl LoopUnrollPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> LoopUnrollPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         LoopUnrollPass {
+            global_data,
             memory: PassMemory::new(prime, String::from(""), Default::default()),
             replacements: Default::default(),
             extractor: Default::default(),
@@ -62,11 +63,11 @@ impl LoopUnrollPass {
             println!("LOOP ENTRY env {}", env); //TODO: TEMP
         }
         // Compute loop iteration count. If unknown, return immediately.
-        let recorder = EnvRecorder::new(&self.memory);
+        let recorder = EnvRecorder::new(self.global_data, &self.memory);
         {
             //TODO: This has the wrong scope if an inner function w/ fixed params will be processed! Need test case for it.
             //  Can't make it crash. Maybe it's not activating in current setup, it was only when I tried to process the other functions?
-            let interpreter = self.memory.build_interpreter(&recorder);
+            let interpreter = self.memory.build_interpreter(self.global_data, &recorder);
             let mut inner_env = env.clone();
             loop {
                 recorder.record_env_at_header(inner_env.clone());
@@ -119,13 +120,13 @@ impl LoopUnrollPass {
     // checking if new loop buckets appear
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
         println!("\ncontinue_inside {:?} with {} ", bucket, env);
-        let interpreter = self.memory.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let env = Env::new_unroll_block_env(env.clone(), &self.extractor);
         interpreter.execute_block_bucket(bucket, env, true);
     }
 }
 
-impl InterpreterObserver for LoopUnrollPass {
+impl InterpreterObserver for LoopUnrollPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -207,7 +208,7 @@ impl InterpreterObserver for LoopUnrollPass {
     }
 }
 
-impl CircuitTransformationPass for LoopUnrollPass {
+impl CircuitTransformationPass for LoopUnrollPass<'_> {
     fn name(&self) -> &str {
         "LoopUnrollPass"
     }
@@ -234,7 +235,7 @@ impl CircuitTransformationPass for LoopUnrollPass {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
@@ -259,6 +260,7 @@ impl CircuitTransformationPass for LoopUnrollPass {
 
 #[cfg(test)]
 mod test {
+    use std::cell::RefCell;
     use std::collections::HashMap;
     use compiler::circuit_design::template::TemplateCodeInfo;
     use compiler::compiler_interface::Circuit;
@@ -267,13 +269,14 @@ mod test {
         AddressType, Allocate, ComputeBucket, InstrContext, LoadBucket, LocationRule, LoopBucket,
         OperatorType, StoreBucket, ValueBucket, ValueType,
     };
-    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX};
+    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX, GlobalPassData};
     use crate::passes::loop_unroll::LoopUnrollPass;
 
     #[test]
     fn test_loop_unrolling() {
         let prime = "goldilocks".to_string();
-        let pass = LoopUnrollPass::new(&prime);
+        let global_data = RefCell::new(GlobalPassData::new());
+        let pass = LoopUnrollPass::new(prime, &global_data);
         let mut circuit = example_program();
         circuit.llvm_data.variable_index_mapping.insert("test_0".to_string(), HashMap::new());
         circuit.llvm_data.signal_index_mapping.insert("test_0".to_string(), HashMap::new());
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 31ee34405..513fc9a1a 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -9,17 +9,19 @@ use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value::KnownU32;
-use crate::passes::CircuitTransformationPass;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct MappedToIndexedPass {
+pub struct MappedToIndexedPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     replacements: RefCell<BTreeMap<LocationRule, LocationRule>>,
 }
 
-impl MappedToIndexedPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> MappedToIndexedPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         MappedToIndexedPass {
+            global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
@@ -32,7 +34,7 @@ impl MappedToIndexedPass {
         signal_code: usize,
         env: &Env,
     ) -> LocationRule {
-        let interpreter = self.memory.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
 
         let (resolved_addr, acc_env) =
             interpreter.execute_instruction(cmp_address, env.clone(), false);
@@ -94,7 +96,7 @@ impl MappedToIndexedPass {
     }
 }
 
-impl InterpreterObserver for MappedToIndexedPass {
+impl InterpreterObserver for MappedToIndexedPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -164,7 +166,7 @@ impl InterpreterObserver for MappedToIndexedPass {
     }
 }
 
-impl CircuitTransformationPass for MappedToIndexedPass {
+impl CircuitTransformationPass for MappedToIndexedPass<'_> {
     fn name(&self) -> &str {
         "MappedToIndexedPass"
     }
@@ -199,6 +201,6 @@ impl CircuitTransformationPass for MappedToIndexedPass {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index a00aebf02..7240e3518 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -1,4 +1,5 @@
 use std::cell::RefCell;
+use std::collections::{HashMap, BTreeMap};
 use compiler::circuit_design::function::{FunctionCode, FunctionCodeInfo};
 use compiler::circuit_design::template::{TemplateCode, TemplateCodeInfo};
 use compiler::compiler_interface::Circuit;
@@ -6,12 +7,13 @@ use compiler::intermediate_representation::{Instruction, InstructionList, Instru
 use compiler::intermediate_representation::ir_interface::*;
 use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use crate::passes::{
-    conditional_flattening::ConditionalFlattening,
+    checks::assert_unique_ids_in_circuit, conditional_flattening::ConditionalFlatteningPass,
     deterministic_subcomponent_invocation::DeterministicSubCmpInvokePass,
     loop_unroll::LoopUnrollPass, mapped_to_indexed::MappedToIndexedPass,
     simplification::SimplificationPass, unknown_index_sanitization::UnknownIndexSanitizationPass,
 };
-use crate::passes::checks::assert_unique_ids_in_circuit;
+
+use self::loop_unroll::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
 
 mod conditional_flattening;
 mod simplification;
@@ -396,10 +398,29 @@ pub trait CircuitTransformationPass {
     pre_hook!(pre_hook_nop_bucket, NopBucket);
 }
 
-pub type Passes = RefCell<Vec<Box<dyn CircuitTransformationPass>>>;
+pub enum PassKind {
+    LoopUnroll,
+    Simplification,
+    ConditionalFlattening,
+    DeterministicSubCmpInvoke,
+    MappedToIndexed,
+    UnknownIndexSanitization,
+}
+
+pub struct GlobalPassData {
+    /// Created during loop unrolling, maps generated function name + Env::get_vars_sort
+    /// to location reference in the original function.
+    pub extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
+}
+
+impl GlobalPassData {
+    pub fn new() -> GlobalPassData {
+        GlobalPassData { extract_func_orig_loc: Default::default() }
+    }
+}
 
 pub struct PassManager {
-    passes: Passes,
+    passes: RefCell<Vec<PassKind>>,
 }
 
 impl PassManager {
@@ -407,39 +428,68 @@ impl PassManager {
         PassManager { passes: Default::default() }
     }
 
-    pub fn schedule_loop_unroll_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(LoopUnrollPass::new(prime)));
+    pub fn schedule_loop_unroll_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::LoopUnroll);
         self
     }
 
-    pub fn schedule_simplification_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(SimplificationPass::new(prime)));
+    pub fn schedule_simplification_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::Simplification);
         self
     }
 
-    pub fn schedule_conditional_flattening_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(ConditionalFlattening::new(prime)));
+    pub fn schedule_conditional_flattening_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::ConditionalFlattening);
         self
     }
 
-    pub fn schedule_deterministic_subcmp_invoke_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(DeterministicSubCmpInvokePass::new(prime)));
+    pub fn schedule_deterministic_subcmp_invoke_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::DeterministicSubCmpInvoke);
         self
     }
 
-    pub fn schedule_mapped_to_indexed_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(MappedToIndexedPass::new(prime)));
+    pub fn schedule_mapped_to_indexed_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::MappedToIndexed);
         self
     }
 
-    pub fn schedule_unknown_index_sanitization_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(UnknownIndexSanitizationPass::new(prime)));
+    pub fn schedule_unknown_index_sanitization_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::UnknownIndexSanitization);
         self
     }
 
-    pub fn transform_circuit(&self, circuit: Circuit) -> Circuit {
+    fn build_pass<'d>(
+        kind: PassKind,
+        prime: &String,
+        global_data: &'d RefCell<GlobalPassData>,
+    ) -> Box<dyn CircuitTransformationPass + 'd> {
+        match kind {
+            PassKind::LoopUnroll => Box::new(LoopUnrollPass::new(prime.clone(), global_data)),
+            PassKind::Simplification => {
+                Box::new(SimplificationPass::new(prime.clone(), global_data))
+            }
+            PassKind::ConditionalFlattening => {
+                Box::new(ConditionalFlatteningPass::new(prime.clone(), global_data))
+            }
+            PassKind::DeterministicSubCmpInvoke => {
+                Box::new(DeterministicSubCmpInvokePass::new(prime.clone(), global_data))
+            }
+            PassKind::MappedToIndexed => {
+                Box::new(MappedToIndexedPass::new(prime.clone(), global_data))
+            }
+            PassKind::UnknownIndexSanitization => {
+                Box::new(UnknownIndexSanitizationPass::new(prime.clone(), global_data))
+            }
+        }
+    }
+
+    pub fn transform_circuit(&self, circuit: Circuit, prime: &String) -> Circuit {
+        // NOTE: Used RefCell rather than a mutable reference because storing
+        //  the mutable reference in EnvRecorder was causing rustc errors.
+        let global_data = RefCell::new(GlobalPassData::new());
         let mut transformed_circuit = circuit;
-        for pass in self.passes.borrow().iter() {
+        for kind in self.passes.borrow_mut().drain(..) {
+            let pass = Self::build_pass(kind, prime, &global_data);
             if cfg!(debug_assertions) {
                 println!("Do {}...", pass.name());
             }
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 4b4d003dd..9a3362370 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -8,18 +8,21 @@ use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::CircuitTransformationPass;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct SimplificationPass {
+pub struct SimplificationPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     compute_replacements: RefCell<BTreeMap<ComputeBucket, Value>>,
     call_replacements: RefCell<BTreeMap<CallBucket, Value>>,
+    //TODO: could use BucketId instead of cloning buckets for keys
 }
 
-impl SimplificationPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> SimplificationPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         SimplificationPass {
+            global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             compute_replacements: Default::default(),
             call_replacements: Default::default(),
@@ -27,7 +30,7 @@ impl SimplificationPass {
     }
 }
 
-impl InterpreterObserver for SimplificationPass {
+impl InterpreterObserver for SimplificationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -42,10 +45,11 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_compute_bucket(&self, bucket: &ComputeBucket, env: &Env) -> bool {
         let env = env.clone();
-        let interpreter = self.memory.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (eval, _) = interpreter.execute_compute_bucket(bucket, env, false);
         let eval = eval.expect("Compute bucket must produce a value!");
         if !eval.is_unknown() {
+            // println!("\nCan replace {:?} with {}", bucket, eval);
             self.compute_replacements.borrow_mut().insert(bucket.clone(), eval);
             return false;
         }
@@ -82,11 +86,12 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool {
         let env = env.clone();
-        let interpreter = self.memory.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (eval, _) = interpreter.execute_call_bucket(bucket, env, false);
         if let Some(eval) = eval {
             // Call buckets may not return a value directly
             if !eval.is_unknown() {
+                // println!("\nCan replace {:?} with {}", bucket, eval);
                 self.call_replacements.borrow_mut().insert(bucket.clone(), eval);
                 return false;
             }
@@ -115,7 +120,7 @@ impl InterpreterObserver for SimplificationPass {
     }
 }
 
-impl CircuitTransformationPass for SimplificationPass {
+impl CircuitTransformationPass for SimplificationPass<'_> {
     fn name(&self) -> &str {
         "SimplificationPass"
     }
@@ -164,6 +169,6 @@ impl CircuitTransformationPass for SimplificationPass {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index fd399df9c..c4f3ac18e 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -14,7 +14,7 @@ use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_operation;
 use crate::bucket_interpreter::R;
 use crate::bucket_interpreter::value::Value::{KnownU32, KnownBigInt};
-use crate::passes::CircuitTransformationPass;
+use super::{CircuitTransformationPass, GlobalPassData};
 
 struct ZeroingInterpreter<'a> {
     pub constant_fields: &'a Vec<String>,
@@ -81,7 +81,8 @@ impl<'a> ZeroingInterpreter<'a> {
     }
 }
 
-pub struct UnknownIndexSanitizationPass {
+pub struct UnknownIndexSanitizationPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
     memory: PassMemory,
     load_replacements: RefCell<BTreeMap<LoadBucket, Range<usize>>>,
@@ -91,9 +92,10 @@ pub struct UnknownIndexSanitizationPass {
 /**
  * The goal of this pass is to
  */
-impl UnknownIndexSanitizationPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> UnknownIndexSanitizationPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         UnknownIndexSanitizationPass {
+            global_data,
             memory: PassMemory::new(prime, "".to_string(), Default::default()),
             load_replacements: Default::default(),
             store_replacements: Default::default(),
@@ -145,7 +147,7 @@ impl UnknownIndexSanitizationPass {
         env: &Env,
     ) -> bool {
         let mem = &self.memory;
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = mem.build_interpreter(self.global_data, self);
 
         let resolved_addr = match location {
             LocationRule::Indexed { location, .. } => {
@@ -164,7 +166,7 @@ impl UnknownIndexSanitizationPass {
  * - loads with a function call that returns the loaded value
  * - stores with a function call that performs the store
  */
-impl InterpreterObserver for UnknownIndexSanitizationPass {
+impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -246,7 +248,7 @@ impl InterpreterObserver for UnknownIndexSanitizationPass {
     }
 }
 
-impl CircuitTransformationPass for UnknownIndexSanitizationPass {
+impl CircuitTransformationPass for UnknownIndexSanitizationPass<'_> {
     fn name(&self) -> &str {
         "UnknownIndexSanitizationPass"
     }
@@ -298,6 +300,6 @@ impl CircuitTransformationPass for UnknownIndexSanitizationPass {
 
     fn pre_hook_template(&self, template: &TemplateCode) {
         self.memory.set_scope(template);
-        self.memory.run_template(self, template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index ed4297660..6639bba89 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -12,7 +12,7 @@ use crate::llvm_elements::instructions::{
 use crate::llvm_elements::types::{bigint_type, bool_type, i32_type, void_type};
 
 use super::instructions::create_array_copy;
-use super::instructions::{create_inv, create_return_void};
+use super::instructions::{create_inv, create_return_void, pointer_cast};
 use super::values::zero;
 
 pub const FR_ADD_FN_NAME: &str = "fr_add";
@@ -40,8 +40,10 @@ pub const FR_LOR_FN_NAME: &str = "fr_logic_or";
 pub const FR_LNOT_FN_NAME: &str = "fr_logic_not";
 pub const FR_ADDR_CAST_FN_NAME: &str = "fr_cast_to_addr";
 pub const FR_ARRAY_COPY_FN_NAME: &str = "fr_copy_n";
-pub const FR_IDENTITY_ARR_PTR: &str = "identity_arr_ptr";
 pub const FR_INDEX_ARR_PTR: &str = "index_arr_ptr";
+pub const FR_IDENTITY_ARR_PTR: &str = "identity_arr_ptr";
+pub const FR_PTR_CAST_I32_I256: &str = "cast_ptr_i32_i256";
+pub const FR_PTR_CAST_I256_I32: &str = "cast_ptr_i256_i32";
 
 macro_rules! fr_unary_op_base {
     ($name: expr, $producer: expr, $argTy: expr, $retTy: expr) => {{
@@ -51,13 +53,13 @@ macro_rules! fr_unary_op_base {
         $producer.set_current_bb(main);
 
         let lhs = func.get_nth_param(0).unwrap();
-        lhs
+        (lhs, func)
     }};
 }
 
 macro_rules! fr_unary_op {
     ($name: expr, $producer: expr, $valTy: expr) => {{
-        fr_unary_op_base!($name, $producer, $valTy, $valTy)
+        fr_unary_op_base!($name, $producer, $valTy, $valTy).0
     }};
 }
 
@@ -253,7 +255,7 @@ fn logic_not_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
 }
 
 fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
-    let arg = fr_unary_op_base!(
+    let (arg, _) = fr_unary_op_base!(
         FR_ADDR_CAST_FN_NAME,
         producer,
         bigint_type(producer),
@@ -285,24 +287,6 @@ fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return_void(producer);
 }
 
-fn identity_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
-    let val_type = bigint_type(producer).array_type(0).ptr_type(Default::default());
-    let func = create_function(
-        producer,
-        &None,
-        0,
-        "",
-        FR_IDENTITY_ARR_PTR,
-        val_type.fn_type(&[val_type.into()], false),
-    );
-    add_inline_attribute(producer, func);
-
-    let main = create_bb(producer, func, FR_IDENTITY_ARR_PTR);
-    producer.set_current_bb(main);
-    // Just return the parameter
-    create_return(producer, func.get_nth_param(0).unwrap());
-}
-
 fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let bigint_ty = bigint_type(producer);
     let ret_ty = bigint_ty.ptr_type(Default::default());
@@ -329,6 +313,31 @@ fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, gep.into_pointer_value());
 }
 
+fn identity_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty = bigint_type(producer).array_type(0).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_IDENTITY_ARR_PTR, producer, ty, ty);
+    add_inline_attribute(producer, func);
+    // Just return the parameter
+    create_return(producer, res);
+}
+
+fn ptr_cast_i32_i256_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty_32 = i32_type(producer).ptr_type(Default::default());
+    let ty_256 = bigint_type(producer).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_PTR_CAST_I32_I256, producer, ty_32, ty_256);
+    add_inline_attribute(producer, func);
+    // Cast the i32* to i256* and return
+    create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_256));
+}
+fn ptr_cast_i256_i32_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty_32 = i32_type(producer).ptr_type(Default::default());
+    let ty_256 = bigint_type(producer).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_PTR_CAST_I256_I32, producer, ty_256, ty_32);
+    add_inline_attribute(producer, func);
+    // Cast the i256* to i32* and return
+    create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_32));
+}
+
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     add_fn(producer);
     sub_fn(producer);
@@ -354,7 +363,9 @@ pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     logic_not_fn(producer);
     addr_cast_fn(producer);
     array_copy_fn(producer);
-    identity_arr_ptr_fn(producer);
     index_arr_ptr_fn(producer);
+    identity_arr_ptr_fn(producer);
+    ptr_cast_i32_i256_fn(producer);
+    ptr_cast_i256_i32_fn(producer);
     pow_fn(producer); //uses functions generated by mul_fn & lt_fn
 }
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index c915cf4f0..4fa04e0b9 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -135,33 +135,29 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
 
 struct ExtractedFunctionCtx<'a> {
     current_function: FunctionValue<'a>,
-    lvars: PointerValue<'a>,
-    signals: Option<PointerValue<'a>>,
-    other: Vec<PointerValue<'a>>,
+    // NOTE: The 'lvars' [0 x i256]* parameter must always be present (at position 0).
+    //  The 'signals' [0 x i256]* parameter (at position 1) is optional (to allow
+    //  this to handle the generated array index load functions for the unroller).
+    args: Vec<PointerValue<'a>>,
 }
 
 impl<'a> ExtractedFunctionCtx<'a> {
     fn new(current_function: FunctionValue<'a>) -> Self {
-        // NOTE: The 'lvars' [0 x i256]* parameter must always be present.
-        //  The 'signals' [0 x i256]* parameter is optional (to allow this to
-        //  handle the generated array index load functions for the unroller).
         ExtractedFunctionCtx {
             current_function,
-            lvars: current_function
-                .get_nth_param(0)
-                .expect("Function must have at least 1 argument for lvar array!")
-                .into_pointer_value(),
-            signals: current_function.get_nth_param(1).map(|x| x.into_pointer_value()),
-            other: current_function
+            args: current_function
                 .get_param_iter()
-                .skip(2)
                 .map(|x| x.into_pointer_value())
                 .collect::<Vec<_>>(),
         }
     }
 
+    fn get_lvars_ptr(&self) -> PointerValue<'a> {
+        *self.args.get(0).expect("Function must have at least 1 argument for lvar array!")
+    }
+
     fn get_signals_ptr(&self) -> PointerValue<'a> {
-        self.signals.expect(
+        *self.args.get(1).expect(
             format!("No signals argument for {:?}", self.current_function.get_name()).as_str(),
         )
     }
@@ -174,11 +170,11 @@ impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
         index: IntValue<'a>,
     ) -> AnyValueEnum<'a> {
         //'gep' must read through the pointer with 0 and then index the array
-        create_gep(producer, self.lvars, &[zero(producer), index])
+        create_gep(producer, self.get_lvars_ptr(), &[zero(producer), index])
     }
 
     fn get_variable_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
-        self.lvars.into()
+        self.get_lvars_ptr().into()
     }
 }
 
@@ -191,7 +187,7 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         //NOTE: only used by CreateCmpBucket::produce_llvm_ir
         //TODO: I think instead of ID defining an array index in the gep, it will need to define a static index
         //  in an array of subcomponents in this context (i.e. self.subcmps[id] with offsets [0,0]).
-        todo!("load_subcmp {} from {:?}", _id, self.other);
+        todo!("load_subcmp {} from {:?}", _id, self.args);
         //create_gep(producer, self.subcmps, &[zero(producer), id.into_int_value()]).into_pointer_value()
     }
 
@@ -204,7 +200,7 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
             .into_int_value()
             .get_zero_extended_constant()
             .expect("must reference a constant argument index");
-        *self.other.get(num as usize).expect("must reference a known argument index")
+        *self.args.get(num as usize).expect("must reference a known argument index")
     }
 
     fn load_subcmp_counter(
@@ -212,7 +208,7 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         producer: &dyn LLVMIRProducer<'a>,
         _id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        // Use null pointer to force StoreBucket::produce_llvm_ir to skip counter increment
+        // Use null pointer to force StoreBucket::produce_llvm_ir to skip counter increment.
         producer.context().i32_type().ptr_type(Default::default()).const_null()
     }
 
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index 544e49b0b..6b7cc65ed 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -317,7 +317,7 @@ impl<'a> LLVM<'a> {
     }
 
     pub fn write_to_file(&self, path: &str) -> Result<(), ()> {
-        // Run LLVM IR inliner for the FR_IDENTITY_ARR_PTR and FR_INDEX_ARR_PTR functions
+        // Run LLVM IR inliner for the FR_IDENTITY_* and FR_INDEX_ARR_PTR functions
         let pm = PassManager::create(());
         pm.add_always_inliner_pass();
         pm.run_on(&self.module);
@@ -328,8 +328,7 @@ impl<'a> LLVM<'a> {
         }
         // Run module verification
         self.module.verify().map_err(|llvm_err| {
-            eprintln!("Generated LLVM:");
-            self.module.print_to_stderr();
+            self.dump_module_to_stderr();
             eprintln!(
                 "{}: {}",
                 Colour::Red.paint("LLVM Module verification failed"),
@@ -365,6 +364,11 @@ impl<'a> LLVM<'a> {
             );
         })
     }
+
+    pub fn dump_module_to_stderr(&self) {
+        eprintln!("Generated LLVM:");
+        self.module.print_to_stderr();
+    }
 }
 
 pub fn run_fn_name(name: String) -> String {
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index d3ed67dc3..658ca9e25 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -62,7 +62,6 @@ impl WriteLLVMIR for Circuit {
             &self.llvm_data.variable_index_mapping,
             &self.llvm_data.component_index_mapping,
         ];
-
         for mapping in mappings {
             for range_mapping in mapping.values() {
                 for range in range_mapping.values() {
@@ -70,7 +69,6 @@ impl WriteLLVMIR for Circuit {
                 }
             }
         }
-
         for range in ranges {
             load_array_switch(producer, range);
         }
@@ -85,17 +83,17 @@ impl WriteLLVMIR for Circuit {
                 for p in &f.params {
                     // This section is a little more complicated than desired because IntType and ArrayType do
                     //  not have a common Trait that defines the `array_type` and `ptr_type` member functions.
-                    let ty = match &p.length.len() {
+                    let ty = match &p.length[..] {
                         // [] -> i256*
-                        0 => bigint_type(producer).ptr_type(Default::default()),
+                        [] => bigint_type(producer).ptr_type(Default::default()),
                         // [A] -> [A x i256]*
-                        1 => bigint_type(producer)
-                            .array_type(p.length[0] as u32)
-                            .ptr_type(Default::default()),
+                        [a] => {
+                            bigint_type(producer).array_type(*a as u32).ptr_type(Default::default())
+                        }
                         // [A,B,C,...] -> [C x [B x [A x i256]*]*]*
-                        _ => {
-                            let mut temp = bigint_type(producer).array_type(p.length[0] as u32);
-                            for size in &p.length[1..] {
+                        [a, rest @ ..] => {
+                            let mut temp = bigint_type(producer).array_type(*a as u32);
+                            for size in rest {
                                 temp = temp.array_type(*size as u32);
                             }
                             temp.ptr_type(Default::default())
@@ -145,24 +143,31 @@ impl WriteLLVMIR for Circuit {
             funcs.insert(name, function);
         }
 
-        // Code for the functions
+        // Code for the functions (except for generated functions)
+        let mut generated_functions = vec![];
         for f in &self.functions {
-            let x: Box<dyn LLVMIRProducer<'_>> = if f.header.starts_with(GENERATED_FN_PREFIX) {
-                Box::new(ExtractedFunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]))
+            if f.header.starts_with(GENERATED_FN_PREFIX) {
+                // Hold for later because the body could reference templates
+                //  and the LLVM functions for templates were not pre-defined.
+                generated_functions.push(f);
             } else {
-                Box::new(FunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]))
-            };
-            Self::manage_debug_loc_from_curr(x.as_ref(), f.as_ref());
-            f.produce_llvm_ir(x.as_ref());
+                let current_function = funcs[f.header.as_str()];
+                f.produce_llvm_ir(&FunctionLLVMIRProducer::new(producer, current_function));
+            }
         }
 
         // Code for the templates
         for t in &self.templates {
-            println!("Generating code for {}", t.header);
-            // code.append(&mut t.produce_llvm_ir(producer));
             t.produce_llvm_ir(producer);
         }
 
+        // Code for generated functions
+        for f in generated_functions {
+            assert!(f.header.starts_with(GENERATED_FN_PREFIX));
+            let current_function = funcs[f.header.as_str()];
+            f.produce_llvm_ir(&ExtractedFunctionLLVMIRProducer::new(producer, current_function));
+        }
+
         // Code for prologue
 
         None // No need to return at this level
diff --git a/compiler/src/circuit_design/function.rs b/compiler/src/circuit_design/function.rs
index cba86907e..96ce82739 100644
--- a/compiler/src/circuit_design/function.rs
+++ b/compiler/src/circuit_design/function.rs
@@ -49,6 +49,8 @@ impl ToString for FunctionCodeInfo {
 
 impl WriteLLVMIR for FunctionCodeInfo {
     fn produce_llvm_ir<'ctx, 'prod>(&self, producer: &'prod dyn LLVMIRProducer<'ctx>) -> Option<LLVMInstruction<'ctx>> {
+        println!("Generating code for {}", self.header);
+        Self::manage_debug_loc_from_curr(producer, self);
         let function = producer.current_function();
         let main = create_bb(producer, function, self.header.as_str());
         producer.set_current_bb(main);
diff --git a/compiler/src/circuit_design/template.rs b/compiler/src/circuit_design/template.rs
index 5a80126ad..c0605aed2 100644
--- a/compiler/src/circuit_design/template.rs
+++ b/compiler/src/circuit_design/template.rs
@@ -59,6 +59,7 @@ impl ToString for TemplateCodeInfo {
 
 impl WriteLLVMIR for TemplateCodeInfo {
     fn produce_llvm_ir<'ctx, 'prod>(&self, producer: &'prod dyn LLVMIRProducer<'ctx>) -> Option<LLVMInstruction<'ctx>> {
+        println!("Generating code for {}", self.header);
         let void = void_type(producer);
         let n_signals = self.number_of_inputs + self.number_of_outputs + self.number_of_intermediates;
         let template_struct = create_template_struct(producer, n_signals);

From 678c9eb581d285c746ea0c776c57e22e4c514af2 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 21 Sep 2023 10:58:56 -0500
Subject: [PATCH 26/42] formatting

---
 code_producers/src/llvm_elements/fr.rs        |  8 +-
 .../call_bucket.rs                            | 94 +++++++++++--------
 2 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index 6639bba89..f1ec940a5 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -282,7 +282,13 @@ fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let src = func.get_nth_param(0).unwrap();
     let dst = func.get_nth_param(1).unwrap();
     let len = func.get_nth_param(2).unwrap();
-    create_array_copy(producer, func, src.into_pointer_value(), dst.into_pointer_value(), len.into_int_value());
+    create_array_copy(
+        producer,
+        func,
+        src.into_pointer_value(),
+        dst.into_pointer_value(),
+        len.into_int_value(),
+    );
 
     create_return_void(producer);
 }
diff --git a/compiler/src/intermediate_representation/call_bucket.rs b/compiler/src/intermediate_representation/call_bucket.rs
index 04a79813d..f59b8b3d0 100644
--- a/compiler/src/intermediate_representation/call_bucket.rs
+++ b/compiler/src/intermediate_representation/call_bucket.rs
@@ -126,46 +126,66 @@ impl WriteLLVMIR for CallBucket {
                 format!("{}_arena", self.symbol).as_str(),
             );
 
-        // Get the offsets based on the sizes of the arguments
-        let offsets: Vec<usize> = self.argument_types.iter().scan(0, |state, arg_ty| {
-            let curr_offset = *state;
-            *state = *state + arg_ty.size;
-            Some(curr_offset)
-        }).collect();
+            // Get the offsets based on the sizes of the arguments
+            let offsets: Vec<usize> = self
+                .argument_types
+                .iter()
+                .scan(0, |state, arg_ty| {
+                    let curr_offset = *state;
+                    *state = *state + arg_ty.size;
+                    Some(curr_offset)
+                })
+                .collect();
 
-        // Copy arguments into elements of the arena by indexing order (arg 0 -> arena 0, arg 1 -> arena 1, etc)
-        for ((arg, arg_ty), offset) in self
-            .arguments
-            .iter()
-            .zip(&self.argument_types)
-            .zip(offsets)
-        {
-            let i = create_literal_u32(producer, offset as u64);
-            let ptr = create_gep(producer, arena.into_pointer_value(), &[zero(producer), i]).into_pointer_value();
-            if arg_ty.size > 1 {
-                let src_arg = match arg.as_ref() {
-                    Instruction::Load(v) => {
-                        let index = v.src.produce_llvm_ir(producer).expect("We need to produce some kind of instruction!").into_int_value();
-                        let gep = match &v.address_type {
-                            AddressType::Variable => producer.body_ctx().get_variable(producer, index),
-                            AddressType::Signal => producer.template_ctx().get_signal(producer, index),
-                            AddressType::SubcmpSignal { cmp_address, ..  } => {
-                                let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                                let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                                create_gep(producer, subcmp, &[zero(producer), index])
+            // Copy arguments into elements of the arena by indexing order (arg 0 -> arena 0, arg 1 -> arena 1, etc)
+            for ((arg, arg_ty), offset) in
+                self.arguments.iter().zip(&self.argument_types).zip(offsets)
+            {
+                let i = create_literal_u32(producer, offset as u64);
+                let ptr = create_gep(producer, arena.into_pointer_value(), &[zero(producer), i])
+                    .into_pointer_value();
+                if arg_ty.size > 1 {
+                    let src_arg = match arg.as_ref() {
+                        Instruction::Load(v) => {
+                            let index = v
+                                .src
+                                .produce_llvm_ir(producer)
+                                .expect("We need to produce some kind of instruction!")
+                                .into_int_value();
+                            let gep = match &v.address_type {
+                                AddressType::Variable => {
+                                    producer.body_ctx().get_variable(producer, index)
+                                }
+                                AddressType::Signal => {
+                                    producer.template_ctx().get_signal(producer, index)
+                                }
+                                AddressType::SubcmpSignal { cmp_address, .. } => {
+                                    let addr = cmp_address.produce_llvm_ir(producer).expect(
+                                        "The address of a subcomponent must yield a value!",
+                                    );
+                                    let subcmp =
+                                        producer.template_ctx().load_subcmp_addr(producer, addr);
+                                    create_gep(producer, subcmp, &[zero(producer), index])
+                                }
                             }
-                        }.into_pointer_value();
-                        gep
-                    },
-                    _ => unreachable!(),
-                };
-                let len_arg = create_literal_u32(producer, arg_ty.size as u64);
-                create_call(producer, FR_ARRAY_COPY_FN_NAME, &[src_arg.into(), ptr.into(), len_arg.into()]);
-            } else {
-                let arg_load = arg.produce_llvm_ir(producer).expect("Call arguments must produce a value!");
-                create_store(producer, ptr, arg_load);
+                            .into_pointer_value();
+                            gep
+                        }
+                        _ => unreachable!(),
+                    };
+                    let len_arg = create_literal_u32(producer, arg_ty.size as u64);
+                    create_call(
+                        producer,
+                        FR_ARRAY_COPY_FN_NAME,
+                        &[src_arg.into(), ptr.into(), len_arg.into()],
+                    );
+                } else {
+                    let arg_load = arg
+                        .produce_llvm_ir(producer)
+                        .expect("Call arguments must produce a value!");
+                    create_store(producer, ptr, arg_load);
+                }
             }
-        }
 
             let arena = pointer_cast(
                 producer,

From 90d77c0c89c6bb4b50285b34ac34ad7620824f09 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 21 Sep 2023 13:35:44 -0500
Subject: [PATCH 27/42] update tests related to new JIRA issues 670, 671

---
 circom/tests/loops/inner_loops.circom | 170 +++++++++++++++++---------
 circom/tests/subcmps/mapped.circom    |   4 +-
 circom/tests/subcmps/mapped2.circom   |   4 +-
 circom/tests/subcmps/mapped3.circom   |   4 +-
 circom/tests/subcmps/mapped4.circom   |   4 +-
 circom/tests/subcmps/subcmps2.circom  |   2 +-
 6 files changed, 119 insertions(+), 69 deletions(-)

diff --git a/circom/tests/loops/inner_loops.circom b/circom/tests/loops/inner_loops.circom
index 845b1c3e3..c2cef8c42 100644
--- a/circom/tests/loops/inner_loops.circom
+++ b/circom/tests/loops/inner_loops.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
 
 template InnerLoops(n) {
     signal input a[n];
@@ -9,6 +8,13 @@ template InnerLoops(n) {
 
     for (var i = 0; i < n; i++) {
         for (var j = 0; j <= i; j++) {
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling. So
+            //  it ends up creating two slightly different functions for this innermost body,
+            //  one for each iteration of the outer loop (i.e. when b=0 and when b=1).
             b[i] += a[i - j];
         }
     }
@@ -20,64 +26,108 @@ component main = InnerLoops(2);
 // %lvars = { n, b[0], b[1], i, j }
 //
 //unrolled code:
-//	b[0] = b[0] + a[0 - 0 = 0];
-//	b[1] = b[1] + a[1 - 0 = 1];
-//	b[1] = b[1] + a[1 - 1 = 0];
+//	b[0] = b[0] + a[0 - 0 = 0];     //extracted function 1
+//	b[1] = b[1] + a[1 - 0 = 1];     //extracted function 2
+//	b[1] = b[1] + a[1 - 1 = 0];     //extracted function 2
 //
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %7, i256 %9)
+//CHECK-NEXT:   %call.fr_cast_to_addr4 = call i32 @fr_cast_to_addr(i256 %call.fr_sub)
+//CHECK-NEXT:   %mul_addr5 = mul i32 1, %call.fr_cast_to_addr4
+//CHECK-NEXT:   %add_addr6 = add i32 %mul_addr5, 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 %add_addr6
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 %11)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add7 = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add7, i256* %15, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 %7)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add4 = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add4, i256* %11, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
 //
-//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//// Use the block labels to check that the loop is unrolled and check the unrolled body
-//CHECK-NOT: loop.cond{{.*}}:
-//CHECK-NOT: loop.body{{.*}}:
-//CHECK-NOT: loop.end{{.*}}:
-//CHECK:      unrolled_loop{{.*}}:
-//				// j = 0
-//CHECK-NEXT:   %[[T01:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T01]], align 4
-//				// b[0] = b[0] + a[0]
-//CHECK-NEXT:   %[[T02:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %[[T03:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T02]], align 4
-//CHECK-NEXT:   %[[T04:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   %[[T05:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T04]], align 4
-//CHECK-NEXT:   %[[T06:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T03]], i256 %{{.*}}[[T05]])
-//CHECK-NEXT:   %[[T07:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %{{.*}}[[T06]], i256* %{{.*}}[[T07]], align 4
-//				// j = 1
-//CHECK-NEXT:   %[[T08:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T08]], align 4
-//				// i = 1
-//CHECK-NEXT:   %[[T09:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T09]], align 4
-//				// j = 0
-//CHECK-NEXT:   %[[T10:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T10]], align 4
-//				// b[1] = b[1] + a[1]
-//CHECK-NEXT:   %[[T11:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T12:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T11]], align 4
-//CHECK-NEXT:   %[[T13:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T14:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T13]], align 4
-//CHECK-NEXT:   %[[T15:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T12]], i256 %{{.*}}[[T14]])
-//CHECK-NEXT:   %[[T16:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T15]], i256* %{{.*}}[[T16]], align 4
-//				// j = 1
-//CHECK-NEXT:   %[[T17:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T17]], align 4
-//				// b[1] = b[1] + a[0]
-//CHECK-NEXT:   %[[T18:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T19:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T18]], align 4
-//CHECK-NEXT:   %[[T20:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   %[[T21:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T20]], align 4
-//CHECK-NEXT:   %[[T22:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T19]], i256 %{{.*}}[[T21]])
-//CHECK-NEXT:   %[[T23:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T22]], i256* %{{.*}}[[T23]], align 4
-//				// j = 2
-//CHECK-NEXT:   %[[T24:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T24]], align 4
-//				// i = 2
-//CHECK-NEXT:   %[[T25:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T25]], align 4
-//CHECK-NOT: loop.cond{{.*}}:
-//CHECK-NOT: loop.body{{.*}}:
-//CHECK-NOT: loop.end{{.*}}:
-//CHECK:   }
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   %6 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %7, align 4
+//CHECK-NEXT:   %8 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %8, align 4
+//CHECK-NEXT:   %9 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
+//CHECK-NEXT:   %13 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %13, align 4
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index 3d673f411..fcf5cda63 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -1,8 +1,8 @@
 pragma circom 2.0.0;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
-// XFAIL:.*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index bfc0b3869..41b3d479c 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -1,8 +1,8 @@
 pragma circom 2.0.0;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
-// XFAIL:.*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index 0f1837a7b..8b2de038b 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
-// XFAIL:.*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index 903d10093..dba6889df 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
-// XFAIL:.*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template MatrixOp(q) {
     signal input inp[5][3];
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index a3792cdb4..23f932c8e 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.6;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template Sum(n) {
     signal input inp[n];

From 6ac932bcd9362e27cab392324e25b1012985e490 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Thu, 21 Sep 2023 16:03:52 -0500
Subject: [PATCH 28/42] new subcmp tests and fix a loop test output

---
 circom/tests/loops/call_inside_loop.circom | 165 +++++++++++++++--
 circom/tests/subcmps/subcmps0A.circom      | 154 +++++++++++++++
 circom/tests/subcmps/subcmps0B.circom      | 175 +++++++++++++++++
 circom/tests/subcmps/subcmps0C.circom      | 163 ++++++++++++++++
 circom/tests/subcmps/subcmps0D.circom      | 206 +++++++++++++++++++++
 5 files changed, 847 insertions(+), 16 deletions(-)
 create mode 100644 circom/tests/subcmps/subcmps0A.circom
 create mode 100644 circom/tests/subcmps/subcmps0B.circom
 create mode 100644 circom/tests/subcmps/subcmps0C.circom
 create mode 100644 circom/tests/subcmps/subcmps0D.circom

diff --git a/circom/tests/loops/call_inside_loop.circom b/circom/tests/loops/call_inside_loop.circom
index 58a92a265..912ce3287 100644
--- a/circom/tests/loops/call_inside_loop.circom
+++ b/circom/tests/loops/call_inside_loop.circom
@@ -1,8 +1,8 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL: .*
 
+//arena = { a[0], a[1], n, b, c, d, e, f, g}
 function fun(a, n, b, c, d, e, f, g) {
 	var x[5];
     for (var i = 0; i < n; i++) {
@@ -11,6 +11,8 @@ function fun(a, n, b, c, d, e, f, g) {
 	return x[0] + x[2] + x[4];
 }
 
+//signal_arena = { out, in }
+//lvars = { m, n, a[0], a[1], b[0], b[1], i }
 template CallInLoop(n, m) {
     signal input in;
     signal output out;
@@ -27,18 +29,6 @@ template CallInLoop(n, m) {
 
 component main = CallInLoop(2, 3);
 
-//// Use the block labels to check that the loop is NOT unrolled
-//CHECK-LABEL: define i256 @fun_{{[0-9]+}}
-//CHECK-SAME: (i256* %[[ARG:[0-9]+]])
-//CHECK-NOT: unrolled_loop{{.*}}:
-//CHECK: loop.cond{{.*}}:
-//CHECK: loop.body{{.*}}:
-//CHECK: loop.end{{.*}}:
-//CHECK-NOT: unrolled_loop{{.*}}:
-//CHECK:   }
-
-//signal_arena = { out, in }
-//lvars = { m, n, a[0], a[1], i, b[0], b[1] }
 //
 //     var a[2];
 //     i = 0;
@@ -54,6 +44,149 @@ component main = CallInLoop(2, 3);
 //     i = 2;
 //     out <-- b[0];
 //
-//CHECK-LABEL: define void @CallInLoop_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: TODO: Code produced currently is incorrect! See https://veridise.atlassian.net/browse/VAN-611
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 3, i256 %1)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %call1
+//CHECK-EMPTY: 
+//CHECK-NEXT: call1:
+//CHECK-NEXT:   %fun_0_arena = alloca [15 x i256], align 8
+//CHECK-NEXT:   %0 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 0
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   call void @fr_copy_n(i256* %1, i256* %0, i32 2)
+//CHECK-NEXT:   %2 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 2
+//CHECK-NEXT:   store i256 2, i256* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 3
+//CHECK-NEXT:   store i256 3, i256* %3, align 4
+//CHECK-NEXT:   %4 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 4
+//CHECK-NEXT:   store i256 3, i256* %4, align 4
+//CHECK-NEXT:   %5 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 5
+//CHECK-NEXT:   store i256 3, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 6
+//CHECK-NEXT:   store i256 3, i256* %6, align 4
+//CHECK-NEXT:   %7 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 7
+//CHECK-NEXT:   store i256 3, i256* %7, align 4
+//CHECK-NEXT:   %8 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 8
+//CHECK-NEXT:   store i256 3, i256* %8, align 4
+//CHECK-NEXT:   %9 = bitcast [15 x i256]* %fun_0_arena to i256*
+//CHECK-NEXT:   %call.fun_0 = call i256 @fun_0(i256* %9)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %11)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 4
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fun_0, i256* %12, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @CallInLoop_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %12 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %12, align 4
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %13 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %13, align 4
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %14 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   br label %unrolled_loop10
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop10:
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0)
+//CHECK-NEXT:   %16 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %16, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store11
+//CHECK-EMPTY: 
+//CHECK-NEXT: store11:
+//CHECK-NEXT:   %17 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %18, i256* %19, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0A.circom b/circom/tests/subcmps/subcmps0A.circom
new file mode 100644
index 000000000..af57e8a19
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0A.circom
@@ -0,0 +1,154 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Like SubCmps1 but simpler (no constraints and fewer operations)
+template IsZero() {
+    signal input in;
+    signal output out;
+    out <-- -in;
+}
+
+template SubCmps0A(n) {
+    signal input ins[n];
+    signal output outs[n];
+    
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];     //load(fix)+store(subcmp)
+        outs[i] <-- zeros[i].out;   //load(subcmp)+store(fix)
+                                    //increment iteration variable
+    }
+}
+
+component main = SubCmps0A(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], 
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X5:[0-9]+]], i256* %subc_[[X6:[0-9]+]], [0 x i256]* %sub_[[X7:[0-9]+]], i256* %subc_[[X8:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0A_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %7 = load [0 x i256]*, [0 x i256]** %6, align 8
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
+//CHECK-NEXT:   %16 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
+//CHECK-NEXT:   %19 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
+//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
+//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %9, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
+//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %33 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 0
+//CHECK-NEXT:   %37 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
+//CHECK-NEXT:   %40 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %41 = bitcast i32* %40 to i256*
+//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
+//CHECK-NEXT:   %45 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %46 = bitcast i32* %45 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %32, i256* %36, [0 x i256]* %39, i256* %41, [0 x i256]* %44, i256* %46)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0B.circom b/circom/tests/subcmps/subcmps0B.circom
new file mode 100644
index 000000000..a52c47b5a
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0B.circom
@@ -0,0 +1,175 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Like SubCmps1 but simpler (no constraints and fewer operations)
+template IsZero() {
+    signal input in;
+    signal output out;
+    out <-- -in;
+}
+
+template SubCmps0B(n) {
+    signal input ins[n];
+    signal output outs[n];
+    var temp;
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];     //load(fix)+store(subcmp)
+        outs[i] <-- zeros[i].out;   //load(subcmp)+store(fix)
+        temp = zeros[i].out;
+                                    //increment iteration variable
+    }
+}
+
+component main = SubCmps0B(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X6:[0-9]+]], i256* %subc_[[X7:[0-9]+]], [0 x i256]* %sub_[[X8:[0-9]+]], i256* %subc_[[X9:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %subfix_[[X5]], i32 0
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %8, i256* %9, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0B_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %5 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 0
+//CHECK-NEXT:   %17 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %18 = load [0 x i256]*, [0 x i256]** %17, align 8
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0, i256 0
+//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
+//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
+//CHECK-NEXT:   %26 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %27 = load [0 x i256]*, [0 x i256]** %26, align 8
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %27, i32 0
+//CHECK-NEXT:   %29 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %30 = bitcast i32* %29 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %12, i256* %16, i256* %20, [0 x i256]* %23, i256* %25, [0 x i256]* %28, i256* %30)
+//CHECK-NEXT:   %31 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %32 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %33 = load [0 x i256]*, [0 x i256]** %32, align 8
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %33, i32 0
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0, i256 1
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %38 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %39 = load [0 x i256]*, [0 x i256]** %38, align 8
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0, i256 0
+//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
+//CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %44, i32 0, i256 0
+//CHECK-NEXT:   %46 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %47 = load [0 x i256]*, [0 x i256]** %46, align 8
+//CHECK-NEXT:   %48 = getelementptr [0 x i256], [0 x i256]* %47, i32 0
+//CHECK-NEXT:   %49 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %50 = bitcast i32* %49 to i256*
+//CHECK-NEXT:   %51 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %52 = load [0 x i256]*, [0 x i256]** %51, align 8
+//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %52, i32 0
+//CHECK-NEXT:   %54 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %55 = bitcast i32* %54 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %31, [0 x i256]* %0, i256* %35, i256* %36, i256* %37, i256* %41, i256* %45, [0 x i256]* %48, i256* %50, [0 x i256]* %53, i256* %55)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0C.circom b/circom/tests/subcmps/subcmps0C.circom
new file mode 100644
index 000000000..bc026f3e3
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0C.circom
@@ -0,0 +1,163 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template IsZero() {
+    signal input in;
+    signal output out;
+    signal temp <-- -in;
+    out <-- temp * temp;
+}
+
+template SubCmps0C(n) {
+    signal input ins[n];
+    signal output outs[n];
+
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];
+        outs[i] <-- zeros[i].out;
+    }
+}
+
+component main = SubCmps0C(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X5:[0-9]+]], i256* %subc_[[X6:[0-9]+]], [0 x i256]* %sub_[[X7:[0-9]+]], i256* %subc_[[X8:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %5, i256 %7)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %8, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0C_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %7 = load [0 x i256]*, [0 x i256]** %6, align 8
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
+//CHECK-NEXT:   %16 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
+//CHECK-NEXT:   %19 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
+//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
+//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %9, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
+//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %33 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 0
+//CHECK-NEXT:   %37 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
+//CHECK-NEXT:   %40 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %41 = bitcast i32* %40 to i256*
+//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
+//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
+//CHECK-NEXT:   %45 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %46 = bitcast i32* %45 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %32, i256* %36, [0 x i256]* %39, i256* %41, [0 x i256]* %44, i256* %46)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0D.circom b/circom/tests/subcmps/subcmps0D.circom
new file mode 100644
index 000000000..3dd970b99
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0D.circom
@@ -0,0 +1,206 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Add() {
+    signal input in1;
+    signal input in2;
+    signal output out;
+    out <-- in1 + in2;
+}
+
+template SubCmps0D(n) {
+    signal input ins[n];
+    signal output outs[n];
+
+    component a[n];
+    for (var i = 0; i < n; i++) {
+        a[i] = Add();
+        a[i].in1 <-- ins[i];
+        a[i].in2 <-- ins[i];
+        outs[i] <-- a[i].out;
+    }
+}
+
+component main = SubCmps0D(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X0:[0-9]+]], i256* %fix_[[X1:[0-9]+]], i256* %subfix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X6:[0-9]+]], i256* %subc_[[X7:[0-9]+]], [0 x i256]* %sub_[[X8:[0-9]+]], i256* %subc_[[X9:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X0]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X5]], i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   store i256 %9, i256* %10, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %13, align 4
+//CHECK-NEXT:   br label %return7
+//CHECK-EMPTY: 
+//CHECK-NEXT: return7:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Add_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 %4)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0D_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [3 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   %3 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   %4 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %12 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 2
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %18 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %19 = load [0 x i256]*, [0 x i256]** %18, align 8
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 0
+//CHECK-NEXT:   %22 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %23 = load [0 x i256]*, [0 x i256]** %22, align 8
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0
+//CHECK-NEXT:   %25 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %26 = bitcast i32* %25 to i256*
+//CHECK-NEXT:   %27 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
+//CHECK-NEXT:   %30 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %31 = bitcast i32* %30 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %15, i256* %16, i256* %17, i256* %21, [0 x i256]* %24, i256* %26, [0 x i256]* %29, i256* %31)
+//CHECK-NEXT:   %32 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
+//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 1
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %38 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %39 = load [0 x i256]*, [0 x i256]** %38, align 8
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0, i256 2
+//CHECK-NEXT:   %42 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %45 = load [0 x i256]*, [0 x i256]** %44, align 8
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0
+//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i256 0
+//CHECK-NEXT:   %48 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %49 = load [0 x i256]*, [0 x i256]** %48, align 8
+//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %49, i32 0
+//CHECK-NEXT:   %51 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %52 = bitcast i32* %51 to i256*
+//CHECK-NEXT:   %53 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %54 = load [0 x i256]*, [0 x i256]** %53, align 8
+//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %54, i32 0
+//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %57 = bitcast i32* %56 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %32, [0 x i256]* %0, i256* %36, i256* %37, i256* %41, i256* %42, i256* %43, i256* %47, [0 x i256]* %50, i256* %52, [0 x i256]* %55, i256* %57)
+//CHECK-NEXT:   %58 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %59 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %60 = load [0 x i256]*, [0 x i256]** %59, align 8
+//CHECK-NEXT:   %61 = getelementptr [0 x i256], [0 x i256]* %60, i32 0
+//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %61, i32 0, i256 1
+//CHECK-NEXT:   %63 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %64 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %65 = load [0 x i256]*, [0 x i256]** %64, align 8
+//CHECK-NEXT:   %66 = getelementptr [0 x i256], [0 x i256]* %65, i32 0
+//CHECK-NEXT:   %67 = getelementptr [0 x i256], [0 x i256]* %66, i32 0, i256 2
+//CHECK-NEXT:   %68 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %69 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %70 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %71 = load [0 x i256]*, [0 x i256]** %70, align 8
+//CHECK-NEXT:   %72 = getelementptr [0 x i256], [0 x i256]* %71, i32 0
+//CHECK-NEXT:   %73 = getelementptr [0 x i256], [0 x i256]* %72, i32 0, i256 0
+//CHECK-NEXT:   %74 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %75 = load [0 x i256]*, [0 x i256]** %74, align 8
+//CHECK-NEXT:   %76 = getelementptr [0 x i256], [0 x i256]* %75, i32 0
+//CHECK-NEXT:   %77 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %78 = bitcast i32* %77 to i256*
+//CHECK-NEXT:   %79 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %80 = load [0 x i256]*, [0 x i256]** %79, align 8
+//CHECK-NEXT:   %81 = getelementptr [0 x i256], [0 x i256]* %80, i32 0
+//CHECK-NEXT:   %82 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %83 = bitcast i32* %82 to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %58, [0 x i256]* %0, i256* %62, i256* %63, i256* %67, i256* %68, i256* %69, i256* %73, [0 x i256]* %76, i256* %78, [0 x i256]* %81, i256* %83)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }

From c6f1d1f4dc02c25177873e05e6155e86436a5ba1 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Fri, 22 Sep 2023 21:58:21 -0500
Subject: [PATCH 29/42] handle buckets that aren't used by every iteration, add
 tests

---
 circom/tests/loops/inner_conditional_1.circom |  28 +++
 circom/tests/loops/inner_conditional_2.circom | 218 ++++++++++++++++++
 circom/tests/loops/inner_conditional_3.circom |  27 +++
 circom/tests/loops/inner_conditional_6.circom |  32 +++
 .../src/passes/loop_unroll/body_extractor.rs  |  83 +++++--
 code_producers/src/llvm_elements/fr.rs        |  30 +++
 6 files changed, 393 insertions(+), 25 deletions(-)
 create mode 100644 circom/tests/loops/inner_conditional_1.circom
 create mode 100644 circom/tests/loops/inner_conditional_2.circom
 create mode 100644 circom/tests/loops/inner_conditional_3.circom
 create mode 100644 circom/tests/loops/inner_conditional_6.circom

diff --git a/circom/tests/loops/inner_conditional_1.circom b/circom/tests/loops/inner_conditional_1.circom
new file mode 100644
index 000000000..3a4df079a
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_1.circom
@@ -0,0 +1,28 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// IF condition can be known via in-place unrolling but not when body is extracted to a new function
+template InnerConditional1(N) {
+    signal output out;
+
+    var acc = 0;
+    for (var i = 1; i <= N; i++) {
+        if (i < 5) {
+            acc += i;
+        } else {
+            acc -= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+component main = InnerConditional1(10);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//TODO: add more checks, pending https://veridise.atlassian.net/browse/VAN-676
+
+//CHECK-LABEL: define void @InnerConditional1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//TODO: add more checks, pending https://veridise.atlassian.net/browse/VAN-676
diff --git a/circom/tests/loops/inner_conditional_2.circom b/circom/tests/loops/inner_conditional_2.circom
new file mode 100644
index 000000000..5f0a7648f
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_2.circom
@@ -0,0 +1,218 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is known constant
+template InnerConditional2(N, T) {
+    signal output out;
+
+    var acc = 1;
+    for (var i = 1; i <= N; i++) {
+        if (T == 0) {
+            acc += i;
+        } else {
+            acc *= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+template runner() {
+    signal output out;
+
+    component a = InnerConditional2(4, 0);
+    component b = InnerConditional2(5, 1);
+
+    out <-- a.out + b.out;
+}
+
+component main = runner();
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional2_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [4 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 11, i256* %9, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional2_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [4 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 5, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 120, i256* %10, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @runner_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @InnerConditional2_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @InnerConditional2_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @InnerConditional2_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @InnerConditional2_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 0
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i32 0
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_3.circom b/circom/tests/loops/inner_conditional_3.circom
new file mode 100644
index 000000000..50ce75303
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_3.circom
@@ -0,0 +1,27 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL: .*    // pending https://veridise.atlassian.net/browse/VAN-677
+
+// if condition is NOT known
+template InnerConditional3(N) {
+    signal output out;
+    signal input in;
+
+    var acc = 0;
+    for (var i = 1; i <= N; i++) {
+        if (in == 0) {
+            acc += i;
+        } else {
+            acc -= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+component main = InnerConditional3(3);
+
+//CHECK-LABEL: define void @InnerConditional3{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: TODO
diff --git a/circom/tests/loops/inner_conditional_6.circom b/circom/tests/loops/inner_conditional_6.circom
new file mode 100644
index 000000000..bf378311e
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_6.circom
@@ -0,0 +1,32 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL: .*    // pending https://veridise.atlassian.net/browse/VAN-677
+
+// if condition is NOT known, arrays used inside indexed on iteration variable
+// UPDATE: Circom compiler does not allow the commented block
+template InnerConditional6(N) {
+    signal output out[N];
+    signal input in[N];
+
+    for (var i = 0; i < N; i++) {
+        // if (in[i] == 0) {
+        //     out[i] <-- 999;
+        // } else {
+        //     out[i] <-- 888;
+        // }
+        var x;
+        if (in[i] == 0) {
+            x = 999;
+        } else {
+            x = 888;
+        }
+        out[i] <-- x;
+    }
+}
+
+component main = InnerConditional6(4);
+
+//CHECK-LABEL: define void @InnerConditional6{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: TODO
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index 06b78b54c..741333fbf 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -1,7 +1,7 @@
 use std::cell::{RefCell, Ref};
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::vec;
-use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR, FR_PTR_CAST_I32_I256};
+use code_producers::llvm_elements::fr::*;
 use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::hir::very_concrete_program::Param;
 use compiler::intermediate_representation::{
@@ -85,17 +85,21 @@ impl ExtraArgsResult {
     fn get_passing_refs_for_itr(
         &self,
         iter_num: usize,
-    ) -> Vec<(&(AddressType, AddressOffset), ArgIndex)> {
+    ) -> Vec<(&Option<(AddressType, AddressOffset)>, ArgIndex)> {
         self.bucket_to_itr_to_ref
             .iter()
-            .map(|(k, v)| (v[iter_num].as_ref().unwrap(), self.bucket_to_args[k]))
+            .map(|(k, v)| (&v[iter_num], self.bucket_to_args[k]))
             .collect()
     }
 
     fn get_reverse_passing_refs_for_itr(&self, iter_num: usize) -> ToOriginalLocation {
         self.bucket_to_itr_to_ref.iter().fold(ToOriginalLocation::new(), |mut acc, (k, v)| {
-            let (addr_ty, addr_offset) = v[iter_num].as_ref().unwrap();
-            acc.insert(self.bucket_to_args[k].get_signal_idx(), (addr_ty.clone(), *addr_offset));
+            if let Some((addr_ty, addr_offset)) = v[iter_num].as_ref() {
+                acc.insert(
+                    self.bucket_to_args[k].get_signal_idx(),
+                    (addr_ty.clone(), *addr_offset),
+                );
+            }
             acc
         })
     }
@@ -139,27 +143,41 @@ impl LoopBodyExtractor {
             // Parameter for signals/arena
             args[1] = Self::new_storage_ptr_ref(bucket, AddressType::Signal);
             // Additional parameters for subcmps and variant array indexing within the loop
-            for ((at, val), ai) in extra_arg_info.get_passing_refs_for_itr(iter_num) {
-                match ai {
-                    ArgIndex::Signal(signal) => {
-                        args[signal] = Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val)
-                    }
-                    ArgIndex::SubCmp { signal, arena, counter } => {
-                        // Pass entire subcomponent arena for calling the 'template_run' function
-                        args[arena] = Self::new_storage_ptr_ref(bucket, at.clone());
-                        // Pass specific signal referenced
-                        args[signal] = Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val);
-                        // Pass subcomponent counter reference
-                        if let AddressType::SubcmpSignal { cmp_address, .. } = &at {
-                            //TODO: may only need to add this when is_output=true but have to skip adding the Param too in that case.
-                            args[counter] = Self::new_subcmp_counter_storage_ptr_ref(
-                                bucket,
-                                cmp_address.clone(),
-                            );
-                        } else {
-                            unreachable!()
+            for (loc, ai) in extra_arg_info.get_passing_refs_for_itr(iter_num) {
+                match loc {
+                    None => match ai {
+                        ArgIndex::Signal(signal) => {
+                            args[signal] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
                         }
-                    }
+                        ArgIndex::SubCmp { signal, arena, counter } => {
+                            args[signal] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
+                            args[arena] = Self::new_null_ptr(bucket, FR_NULL_I256_ARR_PTR);
+                            args[counter] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
+                        }
+                    },
+                    Some((at, val)) => match ai {
+                        ArgIndex::Signal(signal) => {
+                            args[signal] =
+                                Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val)
+                        }
+                        ArgIndex::SubCmp { signal, arena, counter } => {
+                            // Pass specific signal referenced
+                            args[signal] =
+                                Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val);
+                            // Pass entire subcomponent arena for calling the 'template_run' function
+                            args[arena] = Self::new_storage_ptr_ref(bucket, at.clone());
+                            // Pass subcomponent counter reference
+                            if let AddressType::SubcmpSignal { cmp_address, .. } = &at {
+                                //TODO: may only need to add this when is_output=true but have to skip adding the Param too in that case.
+                                args[counter] = Self::new_subcmp_counter_storage_ptr_ref(
+                                    bucket,
+                                    cmp_address.clone(),
+                                );
+                            } else {
+                                unreachable!()
+                            }
+                        }
+                    },
                 }
             }
             unrolled.push(
@@ -336,6 +354,21 @@ impl LoopBodyExtractor {
         )
     }
 
+    fn new_null_ptr(bucket: &dyn ObtainMeta, null_fun: &str) -> InstructionPointer {
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            symbol: String::from(null_fun),
+            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+            argument_types: vec![], // LLVM IR generation doesn't use this field
+            arguments: vec![],
+        }
+        .allocate()
+    }
+
     fn all_same<T>(data: T) -> bool
     where
         T: Iterator,
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index f1ec940a5..581ea1180 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -44,6 +44,17 @@ pub const FR_INDEX_ARR_PTR: &str = "index_arr_ptr";
 pub const FR_IDENTITY_ARR_PTR: &str = "identity_arr_ptr";
 pub const FR_PTR_CAST_I32_I256: &str = "cast_ptr_i32_i256";
 pub const FR_PTR_CAST_I256_I32: &str = "cast_ptr_i256_i32";
+pub const FR_NULL_I256_ARR_PTR: &str = "null_i256_arr_ptr";
+pub const FR_NULL_I256_PTR: &str = "null_i256_ptr";
+
+macro_rules! fr_nullary_op {
+    ($name: expr, $producer: expr, $retTy: expr) => {{
+        let func = create_function($producer, &None, 0, "", $name, $retTy.fn_type(&[], false));
+        let main = create_bb($producer, func, $name);
+        $producer.set_current_bb(main);
+        func
+    }};
+}
 
 macro_rules! fr_unary_op_base {
     ($name: expr, $producer: expr, $argTy: expr, $retTy: expr) => {{
@@ -335,6 +346,7 @@ fn ptr_cast_i32_i256_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     // Cast the i32* to i256* and return
     create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_256));
 }
+
 fn ptr_cast_i256_i32_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let ty_32 = i32_type(producer).ptr_type(Default::default());
     let ty_256 = bigint_type(producer).ptr_type(Default::default());
@@ -344,6 +356,22 @@ fn ptr_cast_i256_i32_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_32));
 }
 
+fn null_i256_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let base_ty = bigint_type(producer).array_type(0).ptr_type(Default::default());
+    let func = fr_nullary_op!(FR_NULL_I256_ARR_PTR, producer, base_ty);
+    add_inline_attribute(producer, func);
+    // Just return null value for the proper pointer type
+    create_return(producer, base_ty.const_null());
+}
+
+fn null_i256_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let base_ty = bigint_type(producer).ptr_type(Default::default());
+    let func = fr_nullary_op!(FR_NULL_I256_PTR, producer, base_ty);
+    add_inline_attribute(producer, func);
+    // Just return null value for the proper pointer type
+    create_return(producer, base_ty.const_null());
+}
+
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     add_fn(producer);
     sub_fn(producer);
@@ -373,5 +401,7 @@ pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     identity_arr_ptr_fn(producer);
     ptr_cast_i32_i256_fn(producer);
     ptr_cast_i256_i32_fn(producer);
+    null_i256_arr_ptr_fn(producer);
+    null_i256_ptr_fn(producer);
     pow_fn(producer); //uses functions generated by mul_fn & lt_fn
 }

From f0020efc167d3392e44513860cc4324bfe36ee10 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 12:07:59 -0500
Subject: [PATCH 30/42] stabilize test output when some args are null/None

add additional test (that happened to need this)
---
 circom/tests/loops/inner_conditional_4.circom | 101 ++++++++++++++++++
 .../passes/loop_unroll/loop_env_recorder.rs   |   7 +-
 2 files changed, 105 insertions(+), 3 deletions(-)
 create mode 100644 circom/tests/loops/inner_conditional_4.circom

diff --git a/circom/tests/loops/inner_conditional_4.circom b/circom/tests/loops/inner_conditional_4.circom
new file mode 100644
index 000000000..54f13c096
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_4.circom
@@ -0,0 +1,101 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition can be known via unrolling, arrays used inside indexed on iteration variable
+template InnerConditional4(N) {
+    signal output out[N];
+    signal input in;
+
+    for (var i = 0; i < N; i++) {
+        if (i < 3) {
+            out[i] <-- -in;
+        } else {
+            out[i] <-- in;
+        }
+    }
+}
+
+component main = InnerConditional4(6);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 3)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional4_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 6, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* null)
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* null)
+//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* null)
+//CHECK-NEXT:   %9 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* null, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* null, i256* %12)
+//CHECK-NEXT:   %13 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0, i256* null, i256* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 1dad55b28..3183503b8 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -1,5 +1,5 @@
 use std::cell::{RefCell, Ref};
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 use std::fmt::{Debug, Formatter};
 use indexmap::IndexMap;
 use compiler::intermediate_representation::BucketId;
@@ -43,7 +43,8 @@ pub struct EnvRecorder<'a, 'd> {
     //  the main interpreter while we also need to mutate these internal structures.
     current_iter_num: RefCell<usize>,
     safe_to_move: RefCell<bool>,
-    vals_per_iteration: RefCell<HashMap<usize, VariableValues<'a>>>, // key is iteration number
+    //NOTE: use BTreeMap instead of HashMap for consistent ordering of args in test cases
+    vals_per_iteration: RefCell<BTreeMap<usize, VariableValues<'a>>>, // key is iteration number
 }
 
 impl Debug for EnvRecorder<'_, '_> {
@@ -69,7 +70,7 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
         }
     }
 
-    pub fn get_vals_per_iter(&self) -> Ref<HashMap<usize, VariableValues<'a>>> {
+    pub fn get_vals_per_iter(&self) -> Ref<BTreeMap<usize, VariableValues<'a>>> {
         self.vals_per_iteration.borrow()
     }
 

From 776c8325bcdabbd0b6eb9550a246dd41970f1c7d Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 14:10:00 -0500
Subject: [PATCH 31/42] add more IF statement inside a loop test cases

---
 circom/tests/loops/inner_conditional_1.circom |  98 ++++++-
 circom/tests/loops/inner_conditional_5.circom | 194 +++++++++++++
 circom/tests/loops/inner_conditional_7.circom | 269 ++++++++++++++++++
 circom/tests/loops/inner_conditional_8.circom | 175 ++++++++++++
 circom/tests/loops/inner_conditional_9.circom | 178 ++++++++++++
 5 files changed, 911 insertions(+), 3 deletions(-)
 create mode 100644 circom/tests/loops/inner_conditional_5.circom
 create mode 100644 circom/tests/loops/inner_conditional_7.circom
 create mode 100644 circom/tests/loops/inner_conditional_8.circom
 create mode 100644 circom/tests/loops/inner_conditional_9.circom

diff --git a/circom/tests/loops/inner_conditional_1.circom b/circom/tests/loops/inner_conditional_1.circom
index 3a4df079a..85c98fca5 100644
--- a/circom/tests/loops/inner_conditional_1.circom
+++ b/circom/tests/loops/inner_conditional_1.circom
@@ -22,7 +22,99 @@ component main = InnerConditional1(10);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
-//TODO: add more checks, pending https://veridise.atlassian.net/browse/VAN-676
-
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 5)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %3, i256 %5)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 %10)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %11, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %13, i256 1)
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %14, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
 //CHECK-LABEL: define void @InnerConditional1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
-//TODO: add more checks, pending https://veridise.atlassian.net/browse/VAN-676
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 10, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0)
+//CHECK-NEXT:   %11 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0)
+//CHECK-NEXT:   %13 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 21888242871839275222246405745257275088548364400416034343698204186575808495582, i256* %14, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_5.circom b/circom/tests/loops/inner_conditional_5.circom
new file mode 100644
index 000000000..37748c52d
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_5.circom
@@ -0,0 +1,194 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is known constant, arrays used inside indexed on iteration variable
+template InnerConditional5(N, T) {
+    signal output out[N];
+
+    for (var i = 0; i < N; i++) {
+        if (T == 0) {
+            out[i] <-- 777;
+        } else {
+            out[i] <-- 999;
+        }
+    }
+}
+
+template runner() {
+    signal output out;
+
+    component a = InnerConditional5(4, 0);
+    component b = InnerConditional5(5, 1);
+
+    out <-- a.out[1] + b.out[0];
+}
+
+component main = runner();
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 777, i256* %0, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 1)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %3, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %0, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 1)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %3, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional5_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional5_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 5, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @runner_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @InnerConditional5_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @InnerConditional5_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @InnerConditional5_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @InnerConditional5_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i32 0
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_7.circom b/circom/tests/loops/inner_conditional_7.circom
new file mode 100644
index 000000000..2ce359d33
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_7.circom
@@ -0,0 +1,269 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template InnerConditional7(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        // NOTE: When processing the outer loop, the statements indexed with 'j' are determined
+        //  NOT safe to move into a new function since 'j' is unknown. That results in the outer
+        //  loop unrolling without extrating the body to a new function. Then the three copies
+        //  of the inner loop are processed and their bodies are extracted to new functions and
+        //  replaced with calls to those functions before unrolling. So it ends up creating
+        //  three slightly different functions for this innermost body, one for each iteration
+        //  of the outer loop. Within each of those functions, 'i' is a known fixed value.
+        for (var j = 0; j < N; j++) {
+            if (i > 1) {
+                a[j] += 999;
+            } else {
+                a[j] -= 111;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional7(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 777, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 777, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 777, i256* %2, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %4)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %6)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 111)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %9, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional7_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [6 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0, i256 2
+//CHECK-NEXT:   %15 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %14, i256* %16)
+//CHECK-NEXT:   %17 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
+//CHECK-NEXT:   %20 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %17, [0 x i256]* %0, i256* %19, i256* %21)
+//CHECK-NEXT:   %22 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %22, align 4
+//CHECK-NEXT:   %23 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %23, align 4
+//CHECK-NEXT:   %24 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %25 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %25, i32 0, i256 1
+//CHECK-NEXT:   %27 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %27, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %24, [0 x i256]* %0, i256* %26, i256* %28)
+//CHECK-NEXT:   %29 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %30 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0, i256 2
+//CHECK-NEXT:   %32 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %29, [0 x i256]* %0, i256* %31, i256* %33)
+//CHECK-NEXT:   %34 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %35 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 3
+//CHECK-NEXT:   %37 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %34, [0 x i256]* %0, i256* %36, i256* %38)
+//CHECK-NEXT:   %39 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %39, align 4
+//CHECK-NEXT:   %40 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %40, align 4
+//CHECK-NEXT:   %41 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %42 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %42, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %41, [0 x i256]* %0, i256* %43)
+//CHECK-NEXT:   %44 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %45 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %44, [0 x i256]* %0, i256* %46)
+//CHECK-NEXT:   %47 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %48 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %48, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %47, [0 x i256]* %0, i256* %49)
+//CHECK-NEXT:   %50 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 3, i256* %50, align 4
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 1554, i256* %51, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_8.circom b/circom/tests/loops/inner_conditional_8.circom
new file mode 100644
index 000000000..2f7c3f217
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_8.circom
@@ -0,0 +1,175 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// like inner_conditional_7 but with 'i' and 'j' uses swapped
+template InnerConditional8(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        for (var j = 0; j < N; j++) {
+            if (j > 1) {
+                a[i] += 999;
+            } else {
+                a[i] -= 111;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional8(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %loop2
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop2:
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %4, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 999)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %9, i256 111)
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %10, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %13, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %15, i256 1)
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %16, align 4
+//CHECK-NEXT:   br label %return10
+//CHECK-EMPTY: 
+//CHECK-NEXT: return10:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional8_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %6 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   br label %unrolled_loop7
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop7:
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i256 1
+//CHECK-NEXT:   %14 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11, i256* %13, i256* %15)
+//CHECK-NEXT:   %16 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0, i256 2
+//CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0, i256 2
+//CHECK-NEXT:   %21 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %21, i32 0, i256 2
+//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %16, [0 x i256]* %0, i256* %18, i256* %20, i256* %22, i256* %24)
+//CHECK-NEXT:   %25 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %26, i32 0, i256 3
+//CHECK-NEXT:   %28 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i256 3
+//CHECK-NEXT:   %30 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0, i256 3
+//CHECK-NEXT:   %32 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %25, [0 x i256]* %0, i256* %27, i256* %29, i256* %31, i256* %33)
+//CHECK-NEXT:   %34 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %35 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 4
+//CHECK-NEXT:   %37 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 4
+//CHECK-NEXT:   %39 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0, i256 4
+//CHECK-NEXT:   %41 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %42 = getelementptr [0 x i256], [0 x i256]* %41, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %34, [0 x i256]* %0, i256* %36, i256* %38, i256* %40, i256* %42)
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 3552, i256* %43, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_9.circom b/circom/tests/loops/inner_conditional_9.circom
new file mode 100644
index 000000000..345e26d0c
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_9.circom
@@ -0,0 +1,178 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template InnerConditional9(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        if (i > 1) {
+            // runs when i∈{0,1}
+            for (var j = 0; j < N; j++) {
+                a[i] += 999;
+            }
+        } else {
+            // runs when i∈{2,3}
+            for (var j = 0; j < N; j++) {
+                a[i] -= 999;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional9(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %loop.cond2
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store11
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %5, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 999)
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %11, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond2:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_lt5 = call i1 @fr_lt(i256 %13, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt5, label %loop.body3, label %loop.end4
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body3:
+//CHECK-NEXT:   %14 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %15, i256 999)
+//CHECK-NEXT:   %16 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %16, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %call.fr_add6 = call i256 @fr_add(i256 %18, i256 1)
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add6, i256* %19, align 4
+//CHECK-NEXT:   br label %loop.cond2
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end4:
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: store11:
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %21 = load i256, i256* %20, align 4
+//CHECK-NEXT:   %call.fr_add7 = call i256 @fr_add(i256 %21, i256 1)
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add7, i256* %22, align 4
+//CHECK-NEXT:   br label %return12
+//CHECK-EMPTY: 
+//CHECK-NEXT: return12:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional9_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %6 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   br label %unrolled_loop7
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop7:
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11, i256* null, i256* null)
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0, i256 2
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %14, i256* %16, i256* null, i256* null)
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
+//CHECK-NEXT:   %20 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %17, [0 x i256]* %0, i256* null, i256* null, i256* %19, i256* %21)
+//CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0, i256 4
+//CHECK-NEXT:   %25 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %25, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %22, [0 x i256]* %0, i256* null, i256* null, i256* %24, i256* %26)
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 21888242871839275222246405745257275088548364400416034343698204186575808487625, i256* %27, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }

From 89126828d8d761ec6312990ee5df51f71ace38de Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 14:37:05 -0500
Subject: [PATCH 32/42] [VAN-677]

---
 circom/tests/loops/inner_conditional_3.circom | 90 ++++++++++++++++++-
 circom/tests/loops/inner_conditional_6.circom | 90 ++++++++++++++++++-
 circuit_passes/src/bucket_interpreter/mod.rs  |  5 +-
 3 files changed, 175 insertions(+), 10 deletions(-)

diff --git a/circom/tests/loops/inner_conditional_3.circom b/circom/tests/loops/inner_conditional_3.circom
index 50ce75303..554282a24 100644
--- a/circom/tests/loops/inner_conditional_3.circom
+++ b/circom/tests/loops/inner_conditional_3.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL: .*    // pending https://veridise.atlassian.net/browse/VAN-677
 
 // if condition is NOT known
 template InnerConditional3(N) {
@@ -22,6 +21,89 @@ template InnerConditional3(N) {
 
 component main = InnerConditional3(3);
 
-//CHECK-LABEL: define void @InnerConditional3{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: TODO
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %1, i256 0)
+//CHECK-NEXT:   br i1 %call.fr_eq, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %3, i256 %5)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 %10)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %11, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %13, i256 1)
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %14, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional3_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %7 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %8, i256* %9, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_6.circom b/circom/tests/loops/inner_conditional_6.circom
index bf378311e..233e74b44 100644
--- a/circom/tests/loops/inner_conditional_6.circom
+++ b/circom/tests/loops/inner_conditional_6.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL: .*    // pending https://veridise.atlassian.net/browse/VAN-677
 
 // if condition is NOT known, arrays used inside indexed on iteration variable
 // UPDATE: Circom compiler does not allow the commented block
@@ -27,6 +26,89 @@ template InnerConditional6(N) {
 
 component main = InnerConditional6(4);
 
-//CHECK-LABEL: define void @InnerConditional6{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: TODO
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %branch2
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch2:
+//CHECK-NEXT:   %1 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %2, i256 0)
+//CHECK-NEXT:   br i1 %call.fr_eq, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 999, i256* %3, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 888, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return8
+//CHECK-EMPTY: 
+//CHECK-NEXT: return8:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional6_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 6
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 7
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %13, i256* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 4afef5331..2192603cf 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -430,8 +430,9 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
                 self.global_data.borrow().extract_func_orig_loc[name][&env.get_vars_sort()].clone(),
             ),
         );
-        let interp =
-            self.mem.build_interpreter_with_scope(self.global_data, self.observer, name.clone());
+        //NOTE: Do not change scope for the new interpreter because the mem lookups within
+        //  `get_write_operations_in_store_bucket` need to use the original function context.
+        let interp = self.mem.build_interpreter(self.global_data, self.observer);
         let observe = observe && !interp.observer.ignore_function_calls();
         let instructions = &env.get_function(name).body;
         unsafe {

From bb311be6a634135b0ef13af8b155151b1a443646 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 19:45:44 -0500
Subject: [PATCH 33/42] add and improve lit test output checks

---
 circom/tests/subcmps/conv_map2idx_A.circom | 112 ++++++++++++++++-
 circom/tests/subcmps/conv_map2idx_B.circom | 135 ++++++++++++++++++++-
 circom/tests/subcmps/subcmps0A.circom      |   6 +-
 circom/tests/subcmps/subcmps0B.circom      |   8 +-
 circom/tests/subcmps/subcmps0C.circom      |   6 +-
 circom/tests/subcmps/subcmps0D.circom      |  11 +-
 6 files changed, 261 insertions(+), 17 deletions(-)

diff --git a/circom/tests/subcmps/conv_map2idx_A.circom b/circom/tests/subcmps/conv_map2idx_A.circom
index 3c4c0afc4..4ae5fc574 100644
--- a/circom/tests/subcmps/conv_map2idx_A.circom
+++ b/circom/tests/subcmps/conv_map2idx_A.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.3;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template GetWeight(A) {
     signal input inp;
@@ -18,3 +18,113 @@ template ComputeValue() {
 }
 
 component main = ComputeValue();
+
+//CHECK-LABEL: define void @GetWeight_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [1 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 1, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [1 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %1, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [1 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 1, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [1 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 1, i256* %1, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [0 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   store [0 x i256]* %1, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @GetWeight_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @GetWeight_1_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %4 = load [0 x i256]*, [0 x i256]** %3, align 8
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %4, i32 0, i32 0
+//CHECK-NEXT:   store i256 888, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %6, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %6, align 4
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %8)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %9 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %10 = load [0 x i256]*, [0 x i256]** %9, align 8
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %11, align 4
+//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %12, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %12, align 4
+//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/conv_map2idx_B.circom b/circom/tests/subcmps/conv_map2idx_B.circom
index 76f067882..7fe243854 100644
--- a/circom/tests/subcmps/conv_map2idx_B.circom
+++ b/circom/tests/subcmps/conv_map2idx_B.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.3;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template GetWeight(A, B) {
     signal output x;    //signal index 0
@@ -21,3 +21,136 @@ template ComputeValue() {
 }
 
 component main = ComputeValue();
+
+//CHECK-LABEL: define void @GetWeight_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [3 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 999, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [3 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 888, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 888, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [2 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @GetWeight_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @GetWeight_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @GetWeight_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %10, i256* %11, align 4
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %10, i256 %12, i1* %constraint)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i32 2
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   store i256 %16, i256* %17, align 4
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %16, i256 %18, i1* %constraint1)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0A.circom b/circom/tests/subcmps/subcmps0A.circom
index af57e8a19..dd4361068 100644
--- a/circom/tests/subcmps/subcmps0A.circom
+++ b/circom/tests/subcmps/subcmps0A.circom
@@ -26,7 +26,7 @@ component main = SubCmps0A(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], 
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X5:[0-9]+]], i256* %subc_[[X6:[0-9]+]], [0 x i256]* %sub_[[X7:[0-9]+]], i256* %subc_[[X8:[0-9]+]]){{.*}} {
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -38,8 +38,8 @@ component main = SubCmps0A(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
diff --git a/circom/tests/subcmps/subcmps0B.circom b/circom/tests/subcmps/subcmps0B.circom
index a52c47b5a..897e4aef3 100644
--- a/circom/tests/subcmps/subcmps0B.circom
+++ b/circom/tests/subcmps/subcmps0B.circom
@@ -26,8 +26,8 @@ template SubCmps0B(n) {
 component main = SubCmps0B(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]],
-//CHECK-SAME: i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X6:[0-9]+]], i256* %subc_[[X7:[0-9]+]], [0 x i256]* %sub_[[X8:[0-9]+]], i256* %subc_[[X9:[0-9]+]]){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -39,8 +39,8 @@ component main = SubCmps0B(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
diff --git a/circom/tests/subcmps/subcmps0C.circom b/circom/tests/subcmps/subcmps0C.circom
index bc026f3e3..ae2ad2597 100644
--- a/circom/tests/subcmps/subcmps0C.circom
+++ b/circom/tests/subcmps/subcmps0C.circom
@@ -25,7 +25,7 @@ component main = SubCmps0C(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X5:[0-9]+]], i256* %subc_[[X6:[0-9]+]], [0 x i256]* %sub_[[X7:[0-9]+]], i256* %subc_[[X8:[0-9]+]]){{.*}} {
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -37,8 +37,8 @@ component main = SubCmps0C(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
diff --git a/circom/tests/subcmps/subcmps0D.circom b/circom/tests/subcmps/subcmps0D.circom
index 3dd970b99..5727c4449 100644
--- a/circom/tests/subcmps/subcmps0D.circom
+++ b/circom/tests/subcmps/subcmps0D.circom
@@ -25,7 +25,8 @@ template SubCmps0D(n) {
 component main = SubCmps0D(3);
 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X0:[0-9]+]], i256* %fix_[[X1:[0-9]+]], i256* %subfix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X6:[0-9]+]], i256* %subc_[[X7:[0-9]+]], [0 x i256]* %sub_[[X8:[0-9]+]], i256* %subc_[[X9:[0-9]+]]){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X0:[0-9]+]], i256* %fix_[[X1:[0-9]+]], i256* %subfix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X2]], i256* %subc_[[X2]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -37,8 +38,8 @@ component main = SubCmps0D(3);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X2]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X2]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
@@ -49,8 +50,8 @@ component main = SubCmps0D(3);
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X2]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X2]])
 //CHECK-NEXT:   br label %store5
 //CHECK-EMPTY: 
 //CHECK-NEXT: store5:

From bf59c04fe9361fa3174541eb4a033624941e4877 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 21:55:18 -0500
Subject: [PATCH 34/42] fix build, happens to be [VAN-673]

---
 circom/tests/subcmps/subcmps0A.circom         | 102 +++++-----
 circom/tests/subcmps/subcmps0B.circom         | 120 +++++------
 circom/tests/subcmps/subcmps0C.circom         | 102 +++++-----
 circom/tests/subcmps/subcmps0D.circom         | 191 ++++++++----------
 .../src/passes/loop_unroll/body_extractor.rs  |  27 ++-
 .../loop_unroll/extracted_location_updater.rs |  38 ++--
 6 files changed, 267 insertions(+), 313 deletions(-)

diff --git a/circom/tests/subcmps/subcmps0A.circom b/circom/tests/subcmps/subcmps0A.circom
index dd4361068..4f33801b3 100644
--- a/circom/tests/subcmps/subcmps0A.circom
+++ b/circom/tests/subcmps/subcmps0A.circom
@@ -25,8 +25,8 @@ template SubCmps0A(n) {
 component main = SubCmps0A(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], 
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -38,8 +38,8 @@ component main = SubCmps0A(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
@@ -86,67 +86,57 @@ component main = SubCmps0A(2);
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
-//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
 //CHECK-NEXT:   br label %create_cmp2
 //CHECK-EMPTY: 
 //CHECK-NEXT: create_cmp2:
-//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
-//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
 //CHECK-NEXT:   br label %unrolled_loop4
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop4:
-//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %7 = load [0 x i256]*, [0 x i256]** %6, align 8
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
-//CHECK-NEXT:   %16 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
-//CHECK-NEXT:   %19 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
-//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
-//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
-//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %9, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
-//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %27 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
-//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
-//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
-//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %33 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
-//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 0
-//CHECK-NEXT:   %37 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
-//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
-//CHECK-NEXT:   %40 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %41 = bitcast i32* %40 to i256*
-//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
-//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
-//CHECK-NEXT:   %45 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %46 = bitcast i32* %45 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %32, i256* %36, [0 x i256]* %39, i256* %41, [0 x i256]* %44, i256* %46)
+//CHECK-NEXT:   %[[T05:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T06:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T07:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T06]], align 8
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T07]], i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0, i256 1
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T16]], align 8
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T17]], i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T20:[0-9]+]] = bitcast i32* %[[T19]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T05]], [0 x i256]* %0, i256* %[[T09]], i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], [0 x i256]* %[[T18]], i256* %[[T20]])
+//CHECK-NEXT:   %[[T21:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T24]], i32 0, i256 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T33:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T32]], align 8
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T33]], i32 0
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T36:[0-9]+]] = bitcast i32* %[[T35]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T21]], [0 x i256]* %0, i256* %[[T25]], i256* %[[T26]], i256* %[[T27]], i256* %[[T31]], [0 x i256]* %[[T34]], i256* %[[T36]])
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/subcmps/subcmps0B.circom b/circom/tests/subcmps/subcmps0B.circom
index 897e4aef3..f858f60e8 100644
--- a/circom/tests/subcmps/subcmps0B.circom
+++ b/circom/tests/subcmps/subcmps0B.circom
@@ -27,7 +27,7 @@ component main = SubCmps0B(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
 //CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -39,8 +39,8 @@ component main = SubCmps0B(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
@@ -94,80 +94,70 @@ component main = SubCmps0B(2);
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
-//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
 //CHECK-NEXT:   br label %create_cmp2
 //CHECK-EMPTY: 
 //CHECK-NEXT: create_cmp2:
-//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
-//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %5 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   %[[T05:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %[[T05]], align 4
 //CHECK-NEXT:   br label %unrolled_loop5
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop5:
-//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0
-//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 0
-//CHECK-NEXT:   %17 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %18 = load [0 x i256]*, [0 x i256]** %17, align 8
-//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0
-//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0, i256 0
-//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
-//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
-//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
-//CHECK-NEXT:   %26 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %27 = load [0 x i256]*, [0 x i256]** %26, align 8
-//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %27, i32 0
-//CHECK-NEXT:   %29 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %30 = bitcast i32* %29 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %12, i256* %16, i256* %20, [0 x i256]* %23, i256* %25, [0 x i256]* %28, i256* %30)
-//CHECK-NEXT:   %31 = bitcast [3 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %32 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %33 = load [0 x i256]*, [0 x i256]** %32, align 8
-//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %33, i32 0
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0, i256 1
-//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %38 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %39 = load [0 x i256]*, [0 x i256]** %38, align 8
-//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0
-//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0, i256 0
-//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
-//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
-//CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %44, i32 0, i256 0
-//CHECK-NEXT:   %46 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %47 = load [0 x i256]*, [0 x i256]** %46, align 8
-//CHECK-NEXT:   %48 = getelementptr [0 x i256], [0 x i256]* %47, i32 0
-//CHECK-NEXT:   %49 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %50 = bitcast i32* %49 to i256*
-//CHECK-NEXT:   %51 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %52 = load [0 x i256]*, [0 x i256]** %51, align 8
-//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %52, i32 0
-//CHECK-NEXT:   %54 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %55 = bitcast i32* %54 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %31, [0 x i256]* %0, i256* %35, i256* %36, i256* %37, i256* %41, i256* %45, [0 x i256]* %48, i256* %50, [0 x i256]* %53, i256* %55)
+//CHECK-NEXT:   %[[T06:[0-9]+]] = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T07:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T08:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T07]], align 8
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0, i256 1
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T14:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T13]], align 8
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T15]], i32 0, i256 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T17]], align 8
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T18]], i32 0
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0, i256 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T22:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T21]], align 8
+//CHECK-NEXT:   %[[T23:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T22]], i32 0
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T25:[0-9]+]] = bitcast i32* %[[T24]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T06]], [0 x i256]* %0, i256* %[[T10]], i256* %[[T11]], i256* %[[T12]], i256* %[[T16]], i256* %[[T20]], [0 x i256]* %[[T23]], i256* %[[T25]])
+//CHECK-NEXT:   %[[T26:[0-9]+]] = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T28:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T27]], align 8
+//CHECK-NEXT:   %[[T29:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T28]], i32 0
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0, i256 1
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T34:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T33]], align 8
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T34]], i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T35]], i32 0, i256 0
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T38:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T37]], align 8
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T38]], i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T39]], i32 0, i256 0
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T41]], align 8
+//CHECK-NEXT:   %[[T43:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T42]], i32 0
+//CHECK-NEXT:   %[[T44:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T45:[0-9]+]] = bitcast i32* %[[T44]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T26]], [0 x i256]* %0, i256* %[[T30]], i256* %[[T31]], i256* %[[T32]], i256* %[[T36]], i256* %[[T40]], [0 x i256]* %[[T43]], i256* %[[T45]])
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/subcmps/subcmps0C.circom b/circom/tests/subcmps/subcmps0C.circom
index ae2ad2597..c62cdff92 100644
--- a/circom/tests/subcmps/subcmps0C.circom
+++ b/circom/tests/subcmps/subcmps0C.circom
@@ -24,8 +24,8 @@ template SubCmps0C(n) {
 component main = SubCmps0C(2);
 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X1]], i256* %subc_[[X1]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
@@ -37,8 +37,8 @@ component main = SubCmps0C(2);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X1]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X1]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
@@ -95,67 +95,57 @@ component main = SubCmps0C(2);
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
-//CHECK-NEXT:   store i256 2, i256* %1, align 4
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
 //CHECK-NEXT:   br label %create_cmp2
 //CHECK-EMPTY: 
 //CHECK-NEXT: create_cmp2:
-//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %2)
-//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
-//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %3)
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
 //CHECK-NEXT:   br label %unrolled_loop4
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop4:
-//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %7 = load [0 x i256]*, [0 x i256]** %6, align 8
-//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 0
-//CHECK-NEXT:   %16 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %17 = load [0 x i256]*, [0 x i256]** %16, align 8
-//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0
-//CHECK-NEXT:   %19 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %20 = bitcast i32* %19 to i256*
-//CHECK-NEXT:   %21 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %22 = load [0 x i256]*, [0 x i256]** %21, align 8
-//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %22, i32 0
-//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %25 = bitcast i32* %24 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %9, i256* %10, i256* %11, i256* %15, [0 x i256]* %18, i256* %20, [0 x i256]* %23, i256* %25)
-//CHECK-NEXT:   %26 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %27 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
-//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
-//CHECK-NEXT:   %30 = getelementptr [0 x i256], [0 x i256]* %29, i32 0, i256 1
-//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %33 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
-//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 0
-//CHECK-NEXT:   %37 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %38 = load [0 x i256]*, [0 x i256]** %37, align 8
-//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0
-//CHECK-NEXT:   %40 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %41 = bitcast i32* %40 to i256*
-//CHECK-NEXT:   %42 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %43 = load [0 x i256]*, [0 x i256]** %42, align 8
-//CHECK-NEXT:   %44 = getelementptr [0 x i256], [0 x i256]* %43, i32 0
-//CHECK-NEXT:   %45 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %46 = bitcast i32* %45 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %26, [0 x i256]* %0, i256* %30, i256* %31, i256* %32, i256* %36, [0 x i256]* %39, i256* %41, [0 x i256]* %44, i256* %46)
+//CHECK-NEXT:   %[[T05:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T06:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T07:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T06]], align 8
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T07]], i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0, i256 1
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T16]], align 8
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T17]], i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T20:[0-9]+]] = bitcast i32* %[[T19]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T05]], [0 x i256]* %0, i256* %[[T09]], i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], [0 x i256]* %[[T18]], i256* %[[T20]])
+//CHECK-NEXT:   %[[T21:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T24]], i32 0, i256 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T33:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T32]], align 8
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T33]], i32 0
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T36:[0-9]+]] = bitcast i32* %[[T35]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T21]], [0 x i256]* %0, i256* %[[T25]], i256* %[[T26]], i256* %[[T27]], i256* %[[T31]], [0 x i256]* %[[T34]], i256* %[[T36]])
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circom/tests/subcmps/subcmps0D.circom b/circom/tests/subcmps/subcmps0D.circom
index 5727c4449..c2eeee5f5 100644
--- a/circom/tests/subcmps/subcmps0D.circom
+++ b/circom/tests/subcmps/subcmps0D.circom
@@ -25,39 +25,39 @@ template SubCmps0D(n) {
 component main = SubCmps0D(3);
 
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X0:[0-9]+]], i256* %fix_[[X1:[0-9]+]], i256* %subfix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
-//CHECK-SAME: i256* %fix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X2]], i256* %subc_[[X2]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X4:[0-9]+]], i256* %fix_[[X5:[0-9]+]], i256* %subfix_[[X6:[0-9]+]], [0 x i256]* %sub_[[X6]], i256* %subc_[[X6]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X0]], i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X2]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X2]])
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X4]], i32 0
 //CHECK-NEXT:   %5 = load i256, i256* %4, align 4
-//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X3]], i32 0
 //CHECK-NEXT:   store i256 %5, i256* %6, align 4
 //CHECK-NEXT:   br label %store4
 //CHECK-EMPTY: 
 //CHECK-NEXT: store4:
-//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X2]], i32 0
-//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X2]])
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
 //CHECK-NEXT:   br label %store5
 //CHECK-EMPTY: 
 //CHECK-NEXT: store5:
-//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X5]], i32 0
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X6]], i32 0
 //CHECK-NEXT:   %9 = load i256, i256* %8, align 4
-//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X5]], i32 0
 //CHECK-NEXT:   store i256 %9, i256* %10, align 4
 //CHECK-NEXT:   br label %store6
 //CHECK-EMPTY: 
@@ -100,106 +100,91 @@ component main = SubCmps0D(3);
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
-//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %[[T01]], align 4
 //CHECK-NEXT:   br label %create_cmp2
 //CHECK-EMPTY: 
 //CHECK-NEXT: create_cmp2:
-//CHECK-NEXT:   %2 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
-//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %2)
-//CHECK-NEXT:   %3 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
-//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %3)
-//CHECK-NEXT:   %4 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
-//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T03]])
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T04]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %5 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   %[[T05:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T05]], align 4
 //CHECK-NEXT:   br label %unrolled_loop4
 //CHECK-EMPTY: 
 //CHECK-NEXT: unrolled_loop4:
-//CHECK-NEXT:   %6 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %7 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
-//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 1
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %12 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %13 = load [0 x i256]*, [0 x i256]** %12, align 8
-//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0
-//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 2
-//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %18 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %19 = load [0 x i256]*, [0 x i256]** %18, align 8
-//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0
-//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 0
-//CHECK-NEXT:   %22 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %23 = load [0 x i256]*, [0 x i256]** %22, align 8
-//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0
-//CHECK-NEXT:   %25 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %26 = bitcast i32* %25 to i256*
-//CHECK-NEXT:   %27 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %28 = load [0 x i256]*, [0 x i256]** %27, align 8
-//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0
-//CHECK-NEXT:   %30 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %31 = bitcast i32* %30 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %10, i256* %11, i256* %15, i256* %16, i256* %17, i256* %21, [0 x i256]* %24, i256* %26, [0 x i256]* %29, i256* %31)
-//CHECK-NEXT:   %32 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %33 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %34 = load [0 x i256]*, [0 x i256]** %33, align 8
-//CHECK-NEXT:   %35 = getelementptr [0 x i256], [0 x i256]* %34, i32 0
-//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 1
-//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   %38 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %39 = load [0 x i256]*, [0 x i256]** %38, align 8
-//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0
-//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0, i256 2
-//CHECK-NEXT:   %42 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %45 = load [0 x i256]*, [0 x i256]** %44, align 8
-//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0
-//CHECK-NEXT:   %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i256 0
-//CHECK-NEXT:   %48 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %49 = load [0 x i256]*, [0 x i256]** %48, align 8
-//CHECK-NEXT:   %50 = getelementptr [0 x i256], [0 x i256]* %49, i32 0
-//CHECK-NEXT:   %51 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %52 = bitcast i32* %51 to i256*
-//CHECK-NEXT:   %53 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %54 = load [0 x i256]*, [0 x i256]** %53, align 8
-//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %54, i32 0
-//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %57 = bitcast i32* %56 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %32, [0 x i256]* %0, i256* %36, i256* %37, i256* %41, i256* %42, i256* %43, i256* %47, [0 x i256]* %50, i256* %52, [0 x i256]* %55, i256* %57)
-//CHECK-NEXT:   %58 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %59 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %60 = load [0 x i256]*, [0 x i256]** %59, align 8
-//CHECK-NEXT:   %61 = getelementptr [0 x i256], [0 x i256]* %60, i32 0
-//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %61, i32 0, i256 1
-//CHECK-NEXT:   %63 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
-//CHECK-NEXT:   %64 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %65 = load [0 x i256]*, [0 x i256]** %64, align 8
-//CHECK-NEXT:   %66 = getelementptr [0 x i256], [0 x i256]* %65, i32 0
-//CHECK-NEXT:   %67 = getelementptr [0 x i256], [0 x i256]* %66, i32 0, i256 2
-//CHECK-NEXT:   %68 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
-//CHECK-NEXT:   %69 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %70 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %71 = load [0 x i256]*, [0 x i256]** %70, align 8
-//CHECK-NEXT:   %72 = getelementptr [0 x i256], [0 x i256]* %71, i32 0
-//CHECK-NEXT:   %73 = getelementptr [0 x i256], [0 x i256]* %72, i32 0, i256 0
-//CHECK-NEXT:   %74 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %75 = load [0 x i256]*, [0 x i256]** %74, align 8
-//CHECK-NEXT:   %76 = getelementptr [0 x i256], [0 x i256]* %75, i32 0
-//CHECK-NEXT:   %77 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %78 = bitcast i32* %77 to i256*
-//CHECK-NEXT:   %79 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %80 = load [0 x i256]*, [0 x i256]** %79, align 8
-//CHECK-NEXT:   %81 = getelementptr [0 x i256], [0 x i256]* %80, i32 0
-//CHECK-NEXT:   %82 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %83 = bitcast i32* %82 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %58, [0 x i256]* %0, i256* %62, i256* %63, i256* %67, i256* %68, i256* %69, i256* %73, [0 x i256]* %76, i256* %78, [0 x i256]* %81, i256* %83)
+//CHECK-NEXT:   %[[T06:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T07:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T08:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T07]], align 8
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0, i256 1
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 2
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T18]], align 8
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T20]], i32 0, i256 0
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = bitcast i32* %[[T25]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T06]], [0 x i256]* %0, i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], i256* %[[T16]], i256* %[[T17]], i256* %[[T21]], [0 x i256]* %[[T24]], i256* %[[T26]])
+//CHECK-NEXT:   %[[T27:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 1
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T34:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T33]], align 8
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T34]], i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T35]], i32 0, i256 2
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T38:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T39]], align 8
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T40]], i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T41]], i32 0, i256 0
+//CHECK-NEXT:   %[[T43:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T44:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T43]], align 8
+//CHECK-NEXT:   %[[T45:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T44]], i32 0
+//CHECK-NEXT:   %[[T46:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T47:[0-9]+]] = bitcast i32* %[[T46]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T27]], [0 x i256]* %0, i256* %[[T31]], i256* %[[T32]], i256* %[[T36]], i256* %[[T37]], i256* %[[T38]], i256* %[[T42]], [0 x i256]* %[[T45]], i256* %[[T47]])
+//CHECK-NEXT:   %[[T48:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T49:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T50:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T49]], align 8
+//CHECK-NEXT:   %[[T51:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T50]], i32 0
+//CHECK-NEXT:   %[[T52:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T51]], i32 0, i256 1
+//CHECK-NEXT:   %[[T53:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T54:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T55:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T54]], align 8
+//CHECK-NEXT:   %[[T56:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T55]], i32 0
+//CHECK-NEXT:   %[[T57:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T56]], i32 0, i256 2
+//CHECK-NEXT:   %[[T58:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T59:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T60:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T61:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T60]], align 8
+//CHECK-NEXT:   %[[T62:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T61]], i32 0
+//CHECK-NEXT:   %[[T63:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T62]], i32 0, i256 0
+//CHECK-NEXT:   %[[T64:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T65:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T64]], align 8
+//CHECK-NEXT:   %[[T66:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T65]], i32 0
+//CHECK-NEXT:   %[[T67:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %[[T68:[0-9]+]] = bitcast i32* %[[T67]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T48]], [0 x i256]* %0, i256* %[[T52]], i256* %[[T53]], i256* %[[T57]], i256* %[[T58]], i256* %[[T59]], i256* %[[T63]], [0 x i256]* %[[T66]], i256* %[[T68]])
 //CHECK-NEXT:   br label %prologue
 //CHECK-EMPTY: 
 //CHECK-NEXT: prologue:
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index 741333fbf..da775ebc7 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -1,6 +1,7 @@
 use std::cell::{RefCell, Ref};
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{BTreeMap, HashMap};
 use std::vec;
+use indexmap::{IndexMap, IndexSet};
 use code_producers::llvm_elements::fr::*;
 use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
 use compiler::hir::very_concrete_program::Param;
@@ -8,7 +9,6 @@ use compiler::intermediate_representation::{
     BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
 };
 use compiler::intermediate_representation::ir_interface::*;
-use indexmap::IndexSet;
 use crate::bucket_interpreter::value::Value;
 use crate::passes::LOOP_BODY_FN_PREFIX;
 use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
@@ -35,15 +35,16 @@ impl ArgIndex {
     }
 }
 
-/// Need this structure to skip id/metadata fields in ValueBucket when using as map key
+/// Need this structure to skip id/metadata fields in ValueBucket when using as map key.
+/// Also, the input/output stuff doesn't matter since the extra arguments that are added
+/// based on this are only used to trigger generation of the run function after all of
+/// the inputs have been assigned.
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 struct SubcmpSignalHashFix {
     cmp_address_parse_as: ValueType,
     cmp_address_op_aux_no: usize,
     cmp_address_value: usize,
     uniform_parallel_value: Option<bool>,
-    is_output: bool,
-    input_information: InputInformation,
     counter_override: bool,
 }
 
@@ -52,9 +53,8 @@ impl SubcmpSignalHashFix {
         if let AddressType::SubcmpSignal {
             cmp_address,
             uniform_parallel_value,
-            is_output,
-            input_information,
             counter_override,
+            ..
         } = addr
         {
             if let Instruction::Value(ValueBucket { parse_as, op_aux_no, value, .. }) =
@@ -65,8 +65,6 @@ impl SubcmpSignalHashFix {
                     cmp_address_op_aux_no: op_aux_no,
                     cmp_address_value: value,
                     uniform_parallel_value: uniform_parallel_value.clone(),
-                    is_output: is_output.clone(),
-                    input_information: input_information.clone(),
                     counter_override: counter_override.clone(),
                 };
             }
@@ -77,7 +75,7 @@ impl SubcmpSignalHashFix {
 
 struct ExtraArgsResult {
     bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>>,
-    bucket_to_args: HashMap<BucketId, ArgIndex>,
+    bucket_to_args: IndexMap<BucketId, ArgIndex>,
     num_args: usize,
 }
 
@@ -206,7 +204,7 @@ impl LoopBodyExtractor {
     fn build_new_body(
         &self,
         bucket: &LoopBucket,
-        mut bucket_to_args: HashMap<BucketId, ArgIndex>,
+        mut bucket_to_args: IndexMap<BucketId, ArgIndex>,
         num_args: usize,
     ) -> String {
         // NOTE: must create parameter list before 'bucket_to_args' is modified
@@ -402,7 +400,7 @@ impl LoopBodyExtractor {
         let mut bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>> =
             HashMap::new();
         //
-        let mut bucket_to_args: HashMap<BucketId, ArgIndex> = HashMap::new();
+        let mut bucket_to_args: IndexMap<BucketId, ArgIndex> = IndexMap::new();
         let vpi = recorder.get_vals_per_iter();
         // NOTE: starts at 2 because the current component's signal arena and lvars are first.
         let mut next_idx: FuncArgIdx = 2;
@@ -441,9 +439,9 @@ impl LoopBodyExtractor {
         //  the current component's signal arena and lvars are).
         // Find groups of BucketId that use the same SubcmpSignal (to reduce number of arguments).
         //  A group must have this same property in all iterations in order to be safe to combine.
-        let mut safe_groups: BTreeMap<SubcmpSignalHashFix, HashSet<BucketId>> = Default::default();
+        let mut safe_groups: BTreeMap<SubcmpSignalHashFix, IndexSet<BucketId>> = Default::default();
         for iter_num in 0..recorder.get_iter() {
-            let grps: BTreeMap<SubcmpSignalHashFix, HashSet<BucketId>> = bucket_to_itr_to_ref
+            let grps: BTreeMap<SubcmpSignalHashFix, IndexSet<BucketId>> = bucket_to_itr_to_ref
                 .iter()
                 .map(|(k, col)| (k, &col[iter_num]))
                 .fold(BTreeMap::new(), |mut r, (b, a)| {
@@ -454,6 +452,7 @@ impl LoopBodyExtractor {
                     }
                     r
                 });
+            // Assume all groups are safe until proven otherwise. So if there are none at any point, just quit.
             if iter_num == 0 {
                 safe_groups = grps;
             } else {
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
index d0e599021..1756b410d 100644
--- a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use indexmap::IndexMap;
 use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_PTR;
 use compiler::intermediate_representation::{BucketId, InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
@@ -17,7 +17,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_load_bucket(
         &mut self,
         bucket: &mut LoadBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
             // Update the location information to reference the argument
@@ -46,7 +46,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_store_bucket(
         &mut self,
         bucket: &mut StoreBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         // Check the source/RHS of the store in either case
         self.check_instruction(&mut bucket.src, bucket_arg_order);
@@ -120,7 +120,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_location_rule(
         &mut self,
         location_rule: &mut LocationRule,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         match location_rule {
             LocationRule::Indexed { location, .. } => {
@@ -133,7 +133,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_address_type(
         &mut self,
         addr_type: &mut AddressType,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
             self.check_instruction(cmp_address, bucket_arg_order);
@@ -143,7 +143,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_compute_bucket(
         &mut self,
         bucket: &mut ComputeBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instructions(&mut bucket.stack, bucket_arg_order);
     }
@@ -151,7 +151,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_assert_bucket(
         &mut self,
         bucket: &mut AssertBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(&mut bucket.evaluate, bucket_arg_order);
     }
@@ -159,7 +159,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_loop_bucket(
         &mut self,
         bucket: &mut LoopBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(&mut bucket.continue_condition, bucket_arg_order);
         self.check_instructions(&mut bucket.body, bucket_arg_order);
@@ -168,7 +168,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_create_cmp_bucket(
         &mut self,
         bucket: &mut CreateCmpBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(&mut bucket.sub_cmp_id, bucket_arg_order);
     }
@@ -176,7 +176,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_constraint_bucket(
         &mut self,
         bucket: &mut ConstraintBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(
             match bucket {
@@ -190,7 +190,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_block_bucket(
         &mut self,
         bucket: &mut BlockBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instructions(&mut bucket.body, bucket_arg_order);
     }
@@ -198,7 +198,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_call_bucket(
         &mut self,
         bucket: &mut CallBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instructions(&mut bucket.arguments, bucket_arg_order);
     }
@@ -206,7 +206,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_branch_bucket(
         &mut self,
         bucket: &mut BranchBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(&mut bucket.cond, bucket_arg_order);
         self.check_instructions(&mut bucket.if_branch, bucket_arg_order);
@@ -216,7 +216,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_return_bucket(
         &mut self,
         bucket: &mut ReturnBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         self.check_instruction(&mut bucket.value, bucket_arg_order);
     }
@@ -224,7 +224,7 @@ impl ExtractedFunctionLocationUpdater {
     fn check_log_bucket(
         &mut self,
         bucket: &mut LogBucket,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         for arg in &mut bucket.argsprint {
             if let LogBucketArg::LogExp(i) = arg {
@@ -234,13 +234,13 @@ impl ExtractedFunctionLocationUpdater {
     }
 
     //Nothing to do
-    fn check_value_bucket(&mut self, _: &mut ValueBucket, _: &mut HashMap<BucketId, ArgIndex>) {}
-    fn check_nop_bucket(&mut self, _: &mut NopBucket, _: &mut HashMap<BucketId, ArgIndex>) {}
+    fn check_value_bucket(&mut self, _: &mut ValueBucket, _: &mut IndexMap<BucketId, ArgIndex>) {}
+    fn check_nop_bucket(&mut self, _: &mut NopBucket, _: &mut IndexMap<BucketId, ArgIndex>) {}
 
     fn check_instructions(
         &mut self,
         insts: &mut Vec<InstructionPointer>,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         for i in insts {
             self.check_instruction(i, bucket_arg_order);
@@ -250,7 +250,7 @@ impl ExtractedFunctionLocationUpdater {
     pub fn check_instruction(
         &mut self,
         inst: &mut InstructionPointer,
-        bucket_arg_order: &mut HashMap<BucketId, ArgIndex>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
     ) {
         match inst.as_mut() {
             Instruction::Value(ref mut b) => self.check_value_bucket(b, bucket_arg_order),

From 3715a6a063a85e7bf02e7eea0d6dbfb0d3ee1a66 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 21:55:45 -0500
Subject: [PATCH 35/42] test comments

---
 circom/tests/subcmps/mapped.circom | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index fcf5cda63..69c44a397 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -2,7 +2,8 @@ pragma circom 2.0.0;
 
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
+// XFAIL:.*
+// TODO: I think it has problems related to both https://veridise.atlassian.net/browse/VAN-582 and https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
@@ -39,4 +40,4 @@ template B(n) {
 	}
 }
 
-component main = B(2);
\ No newline at end of file
+component main = B(2);

From 0ba4952007c28532f96e975729e3afc0624f625c Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Mon, 25 Sep 2023 22:02:40 -0500
Subject: [PATCH 36/42] fix the test I forgot about

---
 circom/tests/subcmps/subcmps1.circom | 133 ++++++++++++---------------
 1 file changed, 59 insertions(+), 74 deletions(-)

diff --git a/circom/tests/subcmps/subcmps1.circom b/circom/tests/subcmps/subcmps1.circom
index 97d947f29..883d3d290 100644
--- a/circom/tests/subcmps/subcmps1.circom
+++ b/circom/tests/subcmps/subcmps1.circom
@@ -35,15 +35,15 @@ component main = SubCmps1(3);
 // %subcmps = [ IsZero[0]{signals=[out,in,inv]}, IsZero[1]{SAME} ]
 //
 //CHECK-LABEL: define void @..generated..loop.body.
-//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %[[X1:subfix_[0-9]+]], i256* %[[X2:fix_[0-9]+]], i256* %[[X3:fix_[0-9]+]], i256* %[[X4:subfix_[0-9]+]],
-//CHECK-SAME: [0 x i256]* %[[X5:sub_[0-9]+]], i256* %[[X6:subc_[0-9]+]], [0 x i256]* %[[X7:sub_[0-9]+]], i256* %[[X8:subc_[0-9]+]]){{.*}} {
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
 //CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
 //CHECK-NEXT:   br label %store1
 //CHECK-EMPTY: 
 //CHECK-NEXT: store1:
-//CHECK-NEXT:   %0 = getelementptr i256, i256* %[[X2]], i32 0
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
 //CHECK-NEXT:   %1 = load i256, i256* %0, align 4
-//CHECK-NEXT:   %2 = getelementptr i256, i256* %[[X1]], i32 0
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
 //CHECK-NEXT:   store i256 %1, i256* %2, align 4
 //CHECK-NEXT:   %3 = load i256, i256* %2, align 4
 //CHECK-NEXT:   %constraint = alloca i1, align 1
@@ -51,14 +51,14 @@ component main = SubCmps1(3);
 //CHECK-NEXT:   br label %store2
 //CHECK-EMPTY: 
 //CHECK-NEXT: store2:
-//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %[[X5]], i32 0
-//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %[[X5]])
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
 //CHECK-NEXT:   br label %store3
 //CHECK-EMPTY: 
 //CHECK-NEXT: store3:
-//CHECK-NEXT:   %5 = getelementptr i256, i256* %[[X4]], i32 0
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
 //CHECK-NEXT:   %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:   %7 = getelementptr i256, i256* %[[X3]], i32 0
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
 //CHECK-NEXT:   store i256 %6, i256* %7, align 4
 //CHECK-NEXT:   %8 = load i256, i256* %7, align 4
 //CHECK-NEXT:   %constraint1 = alloca i1, align 1
@@ -79,70 +79,55 @@ component main = SubCmps1(3);
 //
 //CHECK-LABEL: define void @SubCmps1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
 //CHECK:      unrolled_loop5:
-//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %8 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %9 = load [0 x i256]*, [0 x i256]** %8, align 8
-//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0
-//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
-//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
-//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
-//CHECK-NEXT:   %14 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %15 = load [0 x i256]*, [0 x i256]** %14, align 8
-//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0
-//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %16, i32 0, i256 0
-//CHECK-NEXT:   %18 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %19 = load [0 x i256]*, [0 x i256]** %18, align 8
-//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0
-//CHECK-NEXT:   %21 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %22 = bitcast i32* %21 to i256*
-//CHECK-NEXT:   %23 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
-//CHECK-NEXT:   %24 = load [0 x i256]*, [0 x i256]** %23, align 8
-//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %24, i32 0
-//CHECK-NEXT:   %26 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
-//CHECK-NEXT:   %27 = bitcast i32* %26 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %17, [0 x i256]* %20, i256* %22, [0 x i256]* %25, i256* %27)
-//CHECK-NEXT:   %28 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %29 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %30 = load [0 x i256]*, [0 x i256]** %29, align 8
-//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0
-//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %31, i32 0, i256 1
-//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
-//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
-//CHECK-NEXT:   %35 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %36 = load [0 x i256]*, [0 x i256]** %35, align 8
-//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %36, i32 0
-//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 0
-//CHECK-NEXT:   %39 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %40 = load [0 x i256]*, [0 x i256]** %39, align 8
-//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %40, i32 0
-//CHECK-NEXT:   %42 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %43 = bitcast i32* %42 to i256*
-//CHECK-NEXT:   %44 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
-//CHECK-NEXT:   %45 = load [0 x i256]*, [0 x i256]** %44, align 8
-//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0
-//CHECK-NEXT:   %47 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
-//CHECK-NEXT:   %48 = bitcast i32* %47 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %28, [0 x i256]* %0, i256* %32, i256* %33, i256* %34, i256* %38, [0 x i256]* %41, i256* %43, [0 x i256]* %46, i256* %48)
-//CHECK-NEXT:   %49 = bitcast [2 x i256]* %lvars to [0 x i256]*
-//CHECK-NEXT:   %50 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %51 = load [0 x i256]*, [0 x i256]** %50, align 8
-//CHECK-NEXT:   %52 = getelementptr [0 x i256], [0 x i256]* %51, i32 0
-//CHECK-NEXT:   %53 = getelementptr [0 x i256], [0 x i256]* %52, i32 0, i256 1
-//CHECK-NEXT:   %54 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
-//CHECK-NEXT:   %55 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
-//CHECK-NEXT:   %56 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %57 = load [0 x i256]*, [0 x i256]** %56, align 8
-//CHECK-NEXT:   %58 = getelementptr [0 x i256], [0 x i256]* %57, i32 0
-//CHECK-NEXT:   %59 = getelementptr [0 x i256], [0 x i256]* %58, i32 0, i256 0
-//CHECK-NEXT:   %60 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %61 = load [0 x i256]*, [0 x i256]** %60, align 8
-//CHECK-NEXT:   %62 = getelementptr [0 x i256], [0 x i256]* %61, i32 0
-//CHECK-NEXT:   %63 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %64 = bitcast i32* %63 to i256*
-//CHECK-NEXT:   %65 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
-//CHECK-NEXT:   %66 = load [0 x i256]*, [0 x i256]** %65, align 8
-//CHECK-NEXT:   %67 = getelementptr [0 x i256], [0 x i256]* %66, i32 0
-//CHECK-NEXT:   %68 = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
-//CHECK-NEXT:   %69 = bitcast i32* %68 to i256*
-//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %49, [0 x i256]* %0, i256* %53, i256* %54, i256* %55, i256* %59, [0 x i256]* %62, i256* %64, [0 x i256]* %67, i256* %69)
+//CHECK-NEXT:   %[[T07:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T08]], align 8
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T10]], i32 0, i256 1
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T13:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T14]], align 8
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T15]], i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T16]], i32 0, i256 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T18]], align 8
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T22:[0-9]+]] = bitcast i32* %[[T21]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T07]], [0 x i256]* %0, i256* %[[T11]], i256* %[[T12]], i256* %[[T13]], i256* %[[T17]], [0 x i256]* %[[T20]], i256* %[[T22]])
+//CHECK-NEXT:   %[[T28:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T29:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T30:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T29]], align 8
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T31]], i32 0, i256 1
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T35]], align 8
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T36]], i32 0
+//CHECK-NEXT:   %[[T38:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T37]], i32 0, i256 0
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T39]], align 8
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T40]], i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T43:[0-9]+]] = bitcast i32* %[[T42]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T28]], [0 x i256]* %0, i256* %[[T32]], i256* %[[T33]], i256* %[[T34]], i256* %[[T38]], [0 x i256]* %[[T41]], i256* %[[T43]])
+//CHECK-NEXT:   %[[T49:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T50:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T51:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T50]], align 8
+//CHECK-NEXT:   %[[T52:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T51]], i32 0
+//CHECK-NEXT:   %[[T53:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T52]], i32 0, i256 1
+//CHECK-NEXT:   %[[T54:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T55:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T56:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T57:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T56]], align 8
+//CHECK-NEXT:   %[[T58:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T57]], i32 0
+//CHECK-NEXT:   %[[T59:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T58]], i32 0, i256 0
+//CHECK-NEXT:   %[[T60:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T61:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T60]], align 8
+//CHECK-NEXT:   %[[T62:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T61]], i32 0
+//CHECK-NEXT:   %[[T63:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %[[T64:[0-9]+]] = bitcast i32* %[[T63]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T49]], [0 x i256]* %0, i256* %[[T53]], i256* %[[T54]], i256* %[[T55]], i256* %[[T59]], [0 x i256]* %[[T62]], i256* %[[T64]])
 //CHECK-NEXT:   br label %prologue

From bd55476c132b8dd85512e9396e81669cdbeb53b3 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 26 Sep 2023 10:51:55 -0500
Subject: [PATCH 37/42] comment out debug lines

---
 .../env/extracted_func_env.rs                 | 28 ++++++++---------
 circuit_passes/src/bucket_interpreter/mod.rs  |  8 ++---
 circuit_passes/src/passes/loop_unroll/mod.rs  | 31 +++++++++++--------
 3 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index c8d1a3b63..4ee2ed48a 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -85,7 +85,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        println!("[FINDME] get_subcmp_signal({subcmp_idx}, {signal_idx}) = {} in {}", res, self);
+        // println!("[FINDME] get_subcmp_signal({subcmp_idx}, {signal_idx}) = {} in {}", res, self);
         res
     }
 
@@ -166,7 +166,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        println!("[FINDME] subcmp_counter_is_zero({subcmp_idx}) = {} in {}", res, self);
+        // println!("[FINDME] subcmp_counter_is_zero({subcmp_idx}) = {} in {}", res, self);
         res
     }
 
@@ -190,7 +190,7 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        println!("[FINDME] subcmp_counter_equal_to({subcmp_idx}, {value}) = {} in {}", res, self);
+        // println!("[FINDME] subcmp_counter_equal_to({subcmp_idx}, {value}) = {} in {}", res, self);
         res
     }
 
@@ -226,8 +226,8 @@ impl<'a> ExtractedFuncEnvData<'a> {
     }
 
     pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        let temp_str_self = format!("{}", self);
-        let temp_str_value = format!("{}", value);
+        // let temp_str_self = format!("{}", self);
+        // let temp_str_value = format!("{}", value);
         //NOTE: This is only called by BucketInterpreter::store_value_in_address.
         //Use the map from loop unrolling to convert the SubcmpSignal reference back
         //  into the proper reference (reversing ExtractedFunctionLocationUpdater).
@@ -257,15 +257,15 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        println!(
-            "[FINDME] set_subcmp_signal({subcmp_idx}, {signal_idx}, {})\n BEFORE: {}\n AFTER: {}",
-            temp_str_value, temp_str_self, new_env
-        );
+        // println!(
+        //     "[FINDME] set_subcmp_signal({subcmp_idx}, {signal_idx}, {})\n BEFORE: {}\n AFTER: {}",
+        //     temp_str_value, temp_str_self, new_env
+        // );
         ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
-        let temp_str_self = format!("{}", self);
+        // let temp_str_self = format!("{}", self);
         let new_env = match self.remap.get(&subcmp_idx).cloned() {
             //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
             //  to the 'remap' (producing None result here) because those parameters are
@@ -289,10 +289,10 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        println!(
-            "[FINDME] decrease_subcmp_counter({subcmp_idx})\n BEFORE: {}\n AFTER: {}",
-            temp_str_self, new_env
-        );
+        // println!(
+        //     "[FINDME] decrease_subcmp_counter({subcmp_idx})\n BEFORE: {}\n AFTER: {}",
+        //     temp_str_self, new_env
+        // );
         ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 2192603cf..320c0cbff 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -318,10 +318,10 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
                 }
             }
             AddressType::SubcmpSignal { cmp_address, input_information, .. } => {
-                println!(
-                    "cmp_address = {:?}, input_information = {:?}",
-                    cmp_address, input_information
-                );
+                // println!(
+                //     "cmp_address = {:?}, input_information = {:?}",
+                //     cmp_address, input_information
+                // );
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
                 let addr = addr
                     .expect(
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index cff56cc1a..8c54926d2 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -8,7 +8,7 @@ use std::vec;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{
-    BucketId, InstructionList, InstructionPointer, new_id, UpdateId, ToSExp,
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
@@ -52,16 +52,21 @@ impl<'d> LoopUnrollPass<'d> {
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        {
-            println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
-            for (i, s) in bucket.body.iter().enumerate() {
-                println!("[{}/{}]{}", i + 1, bucket.body.len(), s.to_sexp().to_pretty(100));
-            }
-            for (i, s) in bucket.body.iter().enumerate() {
-                println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
-            }
-            println!("LOOP ENTRY env {}", env); //TODO: TEMP
-        }
+        // {
+        //     println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
+        //     for (i, s) in bucket.body.iter().enumerate() {
+        //         println!(
+        //             "[{}/{}]{}",
+        //             i + 1,
+        //             bucket.body.len(),
+        //             compiler::intermediate_representation::ToSExp::to_sexp(&**s).to_pretty(100)
+        //         );
+        //     }
+        //     for (i, s) in bucket.body.iter().enumerate() {
+        //         println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
+        //     }
+        //     println!("LOOP ENTRY env {}", env); //TODO: TEMP
+        // }
         // Compute loop iteration count. If unknown, return immediately.
         let recorder = EnvRecorder::new(self.global_data, &self.memory);
         {
@@ -84,7 +89,7 @@ impl<'d> LoopUnrollPass<'d> {
                 inner_env = new_env;
             }
         }
-        println!("recorder = {:?}", recorder); //TODO: TEMP
+        // println!("recorder = {:?}", recorder); //TODO: TEMP
 
         let mut block_body = vec![];
         if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
@@ -119,7 +124,7 @@ impl<'d> LoopUnrollPass<'d> {
     // Will take the unrolled loop and interpretate it
     // checking if new loop buckets appear
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
-        println!("\ncontinue_inside {:?} with {} ", bucket, env);
+        // println!("\ncontinue_inside {:?} with {} ", bucket, env);
         let interpreter = self.memory.build_interpreter(self.global_data, self);
         let env = Env::new_unroll_block_env(env.clone(), &self.extractor);
         interpreter.execute_block_bucket(bucket, env, true);

From d86064029b49dd4893a4768b3b76eeaae007dcf1 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Tue, 26 Sep 2023 12:28:27 -0500
Subject: [PATCH 38/42] fix an unhandled case

---
 .../tests/loops/inner_conditional_10.circom   | 93 +++++++++++++++++++
 .../passes/loop_unroll/loop_env_recorder.rs   |  6 +-
 2 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 circom/tests/loops/inner_conditional_10.circom

diff --git a/circom/tests/loops/inner_conditional_10.circom b/circom/tests/loops/inner_conditional_10.circom
new file mode 100644
index 000000000..066ad245c
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_10.circom
@@ -0,0 +1,93 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Sigma() {
+    signal input inp;
+    signal output out;
+}
+
+template Poseidon() {
+    signal input inp;
+
+    component sigmaF[2];
+
+    for (var i=0; i<4; i++) {
+        if (i < 1 || i >= 3) {
+            var k = i < 1 ? 0 : 1;
+            sigmaF[k] = Sigma();
+            sigmaF[k].inp <== inp;
+        }
+    }
+}
+
+component main = Poseidon();
+
+//CHECK-LABEL: define void @Poseidon_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+//CHECK-NEXT:   store i256 %6, i256* %9, align 4
+//CHECK-NEXT:   %10 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %10, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %10, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   call void @Sigma_0_run([0 x i256]* %12)
+//CHECK-NEXT:   %13 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %6, i256 %13, i1* %constraint)
+//CHECK-NEXT:   %14 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 1, i256* %14, align 4
+//CHECK-NEXT:   %15 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %15, align 4
+//CHECK-NEXT:   %16 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %16, align 4
+//CHECK-NEXT:   %17 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %17, align 4
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   %19 = load i256, i256* %18, align 4
+//CHECK-NEXT:   %20 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %21 = load [0 x i256]*, [0 x i256]** %20, align 8
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %21, i32 0, i32 1
+//CHECK-NEXT:   store i256 %19, i256* %22, align 4
+//CHECK-NEXT:   %23 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %23, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %23, align 4
+//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %25 = load [0 x i256]*, [0 x i256]** %24, align 8
+//CHECK-NEXT:   call void @Sigma_0_run([0 x i256]* %25)
+//CHECK-NEXT:   %26 = load i256, i256* %22, align 4
+//CHECK-NEXT:   %constraint3 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %19, i256 %26, i1* %constraint3)
+//CHECK-NEXT:   %27 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %27, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 3183503b8..8462e6dba 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -2,7 +2,7 @@ use std::cell::{RefCell, Ref};
 use std::collections::BTreeMap;
 use std::fmt::{Debug, Formatter};
 use indexmap::IndexMap;
-use compiler::intermediate_representation::BucketId;
+use compiler::intermediate_representation::{BucketId, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
@@ -172,8 +172,10 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
                     cmp_address: {
                         if addr_result == Value::Unknown {
                             self.safe_to_move.replace(false);
+                            NopBucket { id: new_id() }.allocate()
+                        } else {
+                            addr_result.to_value_bucket(self.mem).allocate()
                         }
-                        addr_result.to_value_bucket(self.mem).allocate()
                     },
                     uniform_parallel_value: uniform_parallel_value.clone(),
                     is_output: *is_output,

From e83051f7ffa281b1971e7688e5ec43c0d0b8106f Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 27 Sep 2023 11:45:08 -0500
Subject: [PATCH 39/42] cleanup a bit

---
 .../src/passes/loop_unroll/body_extractor.rs         | 12 ++++++++----
 .../src/passes/loop_unroll/loop_env_recorder.rs      |  4 ++--
 circuit_passes/src/passes/loop_unroll/mod.rs         |  2 --
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index da775ebc7..6e69ba71d 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -134,7 +134,7 @@ impl LoopBodyExtractor {
             //  the "loading" (but really it just returns the pointer that was passed in).
             let mut args = Self::new_filled_vec(
                 extra_arg_info.num_args,
-                Box::new(Instruction::Nop(NopBucket { id: 0 })),
+                NopBucket { id: 0 }.allocate(), // garbage fill
             );
             // Parameter for local vars
             args[0] = Self::new_storage_ptr_ref(bucket, AddressType::Variable);
@@ -384,8 +384,12 @@ impl LoopBodyExtractor {
         .0
     }
 
-    // Key for the returned map is iteration number.
-    // The HashMap that is returned maps bucket to fixed* argument index.
+    /// The ideal scenario for extracting the loop body into a new function is to only
+    /// need 2 function arguments, lvars and signals. However, we want to avoid variable
+    /// indexing within the extracted function so we include extra pointer arguments
+    /// that allow the indexing to happen in the original body where the loop will be
+    /// unrolled and the indexing will become known constant values. This computes the
+    /// extra arguments that will be needed.
     fn compute_extra_args<'a>(recorder: &'a EnvRecorder<'a, '_>) -> ExtraArgsResult {
         // Table structure indexed first by load/store BucketId, then by iteration number.
         //  View the first (BucketId) as columns and the second (iteration number) as rows.
@@ -452,7 +456,7 @@ impl LoopBodyExtractor {
                     }
                     r
                 });
-            // Assume all groups are safe until proven otherwise. So if there are none at any point, just quit.
+            // Assume all groups are safe until proven otherwise. So if it's empty at any point, just quit.
             if iter_num == 0 {
                 safe_groups = grps;
             } else {
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 8462e6dba..962468cc2 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -2,7 +2,7 @@ use std::cell::{RefCell, Ref};
 use std::collections::BTreeMap;
 use std::fmt::{Debug, Formatter};
 use indexmap::IndexMap;
-use compiler::intermediate_representation::{BucketId, new_id};
+use compiler::intermediate_representation::BucketId;
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
 use crate::bucket_interpreter::memory::PassMemory;
@@ -172,7 +172,7 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
                     cmp_address: {
                         if addr_result == Value::Unknown {
                             self.safe_to_move.replace(false);
-                            NopBucket { id: new_id() }.allocate()
+                            NopBucket { id: 0 }.allocate()
                         } else {
                             addr_result.to_value_bucket(self.mem).allocate()
                         }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index 8c54926d2..ad6b14958 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -70,8 +70,6 @@ impl<'d> LoopUnrollPass<'d> {
         // Compute loop iteration count. If unknown, return immediately.
         let recorder = EnvRecorder::new(self.global_data, &self.memory);
         {
-            //TODO: This has the wrong scope if an inner function w/ fixed params will be processed! Need test case for it.
-            //  Can't make it crash. Maybe it's not activating in current setup, it was only when I tried to process the other functions?
             let interpreter = self.memory.build_interpreter(self.global_data, &recorder);
             let mut inner_env = env.clone();
             loop {

From 7cedb6735a6d27e2f3f97321cd11572a37ebd0cf Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 27 Sep 2023 13:55:03 -0500
Subject: [PATCH 40/42] use different label for flattened loops vs branches

---
 .../src/passes/conditional_flattening.rs      | 24 +++++++-------
 circuit_passes/src/passes/loop_unroll/mod.rs  |  1 +
 circuit_passes/src/passes/mod.rs              |  7 +++--
 .../block_bucket.rs                           |  1 +
 .../ir_interface.rs                           | 31 ++++++++++---------
 5 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 13bdcf7bd..56b9dce83 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -135,18 +135,20 @@ impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
                 message_id: bucket.message_id,
                 body: code.clone(),
                 n_iters: 1,
+                label: format!("fold_{}", side),
             };
-            return self.transform_block_bucket(&block);
-        }
-        BranchBucket {
-            id: new_id(),
-            source_file_id: bucket.source_file_id,
-            line: bucket.line,
-            message_id: bucket.message_id,
-            cond: self.transform_instruction(&bucket.cond),
-            if_branch: self.transform_instructions(&bucket.if_branch),
-            else_branch: self.transform_instructions(&bucket.else_branch),
+            self.transform_block_bucket(&block)
+        } else {
+            BranchBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                cond: self.transform_instruction(&bucket.cond),
+                if_branch: self.transform_instructions(&bucket.if_branch),
+                else_branch: self.transform_instructions(&bucket.else_branch),
+            }
+            .allocate()
         }
-        .allocate()
     }
 }
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index ad6b14958..2625da4c2 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -159,6 +159,7 @@ impl InterpreterObserver for LoopUnrollPass<'_> {
                 message_id: bucket.message_id,
                 body: block_body,
                 n_iters,
+                label: String::from("unrolled_loop"),
             };
             self.continue_inside(&block, env);
             self.replacements.borrow_mut().insert(bucket.id, block.allocate());
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 7240e3518..a32498791 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -368,6 +368,7 @@ pub trait CircuitTransformationPass {
             message_id: bucket.message_id,
             body: self.transform_instructions(&bucket.body),
             n_iters: bucket.n_iters,
+            label: bucket.label.clone(),
         }
         .allocate()
     }
@@ -408,8 +409,10 @@ pub enum PassKind {
 }
 
 pub struct GlobalPassData {
-    /// Created during loop unrolling, maps generated function name + Env::get_vars_sort
-    /// to location reference in the original function.
+    /// Created during loop unrolling, maps generated function name + UnrolledIterLvars
+    /// (from Env::get_vars_sort) to location reference in the original function. Used
+    /// by ExtractedFuncEnvData to access the original function's Env via the extracted
+    /// function's parameter references.
     pub extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
 }
 
diff --git a/compiler/src/intermediate_representation/block_bucket.rs b/compiler/src/intermediate_representation/block_bucket.rs
index 12a1f5623..c95e478c1 100644
--- a/compiler/src/intermediate_representation/block_bucket.rs
+++ b/compiler/src/intermediate_representation/block_bucket.rs
@@ -13,6 +13,7 @@ pub struct BlockBucket {
     pub message_id: usize,
     pub body: InstructionList,
     pub n_iters: usize,
+    pub label: String,
 }
 
 impl IntoInstruction for BlockBucket {
diff --git a/compiler/src/intermediate_representation/ir_interface.rs b/compiler/src/intermediate_representation/ir_interface.rs
index dddeec0fa..96c6e3a50 100644
--- a/compiler/src/intermediate_representation/ir_interface.rs
+++ b/compiler/src/intermediate_representation/ir_interface.rs
@@ -258,24 +258,25 @@ impl Instruction {
     pub fn label_name(&self, idx: u32) -> String {
         use Instruction::*;
         match self {
-            Value(_v) => format!("value{}", idx),
-            Load(_v) => format!("load{}", idx),
-            Store(_v) => format!("store{}", idx),
-            Compute(_v) => format!("compute{}", idx),
-            Call(_v) => format!("call{}", idx),
-            Branch(_v) => format!("branch{}", idx),
-            Return(_v) => format!("return{}", idx),
-            Loop(_v) => format!("loop{}", idx),
-            Assert(_v) => format!("assert{}", idx),
-            CreateCmp(_v) => format!("create_cmp{}", idx),
-            Log(_v) => format!("log{}", idx),
+            Value(_) => format!("value{}", idx),
+            Load(_) => format!("load{}", idx),
+            Store(_) => format!("store{}", idx),
+            Compute(_) => format!("compute{}", idx),
+            Call(_) => format!("call{}", idx),
+            Branch(_) => format!("branch{}", idx),
+            Return(_) => format!("return{}", idx),
+            Loop(_) => format!("loop{}", idx),
+            Assert(_) => format!("assert{}", idx),
+            CreateCmp(_) => format!("create_cmp{}", idx),
+            Log(_) => format!("log{}", idx),
             // We use the label name of the wrapped instruction
             Constraint(v) => match v {
                 ConstraintBucket::Substitution(i) => i,
-                ConstraintBucket::Equality(i) => i
-            }.label_name(idx),
-            Block(_) => format!("unrolled_loop{}", idx),
-            Nop(_) => format!("nop{}", idx)
+                ConstraintBucket::Equality(i) => i,
+            }
+            .label_name(idx),
+            Block(BlockBucket { label, .. }) => format!("{}{}", label, idx),
+            Nop(_) => format!("nop{}", idx),
         }
     }
 }

From bd4f80faaab88c7bd4966cf0ff997d6312a284b0 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 11 Oct 2023 15:59:09 -0500
Subject: [PATCH 41/42] address CR

---
 .../env/extracted_func_env.rs                 | 36 ++++---------------
 circuit_passes/src/bucket_interpreter/mod.rs  | 13 -------
 .../src/passes/loop_unroll/body_extractor.rs  |  3 +-
 .../passes/loop_unroll/loop_env_recorder.rs   | 17 +++++----
 circuit_passes/src/passes/loop_unroll/mod.rs  | 17 ---------
 circuit_passes/src/passes/simplification.rs   |  2 --
 code_producers/src/llvm_elements/functions.rs | 14 +++-----
 code_producers/src/llvm_elements/mod.rs       |  2 +-
 code_producers/src/llvm_elements/template.rs  | 14 ++++----
 .../load_bucket.rs                            |  4 +--
 .../store_bucket.rs                           |  2 +-
 11 files changed, 36 insertions(+), 88 deletions(-)

diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
index 4ee2ed48a..57685e7e4 100644
--- a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -85,7 +85,6 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        // println!("[FINDME] get_subcmp_signal({subcmp_idx}, {signal_idx}) = {} in {}", res, self);
         res
     }
 
@@ -166,7 +165,6 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        // println!("[FINDME] subcmp_counter_is_zero({subcmp_idx}) = {} in {}", res, self);
         res
     }
 
@@ -190,7 +188,6 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        // println!("[FINDME] subcmp_counter_equal_to({subcmp_idx}, {value}) = {} in {}", res, self);
         res
     }
 
@@ -217,17 +214,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
         ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()), remap: self.remap }
     }
 
-    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
-        todo!("set_subcmp_to_unk({})", subcmp_idx);
-        // ExtractedFuncEnvData {
-        //     base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)),
-        //     remap: self.remap,
-        // }
+    pub fn set_subcmp_to_unk(self, _subcmp_idx: usize) -> Self {
+        unreachable!()
     }
 
     pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        // let temp_str_self = format!("{}", self);
-        // let temp_str_value = format!("{}", value);
         //NOTE: This is only called by BucketInterpreter::store_value_in_address.
         //Use the map from loop unrolling to convert the SubcmpSignal reference back
         //  into the proper reference (reversing ExtractedFunctionLocationUpdater).
@@ -257,15 +248,10 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        // println!(
-        //     "[FINDME] set_subcmp_signal({subcmp_idx}, {signal_idx}, {})\n BEFORE: {}\n AFTER: {}",
-        //     temp_str_value, temp_str_self, new_env
-        // );
         ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
     pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
-        // let temp_str_self = format!("{}", self);
         let new_env = match self.remap.get(&subcmp_idx).cloned() {
             //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
             //  to the 'remap' (producing None result here) because those parameters are
@@ -289,10 +275,6 @@ impl<'a> ExtractedFuncEnvData<'a> {
                 }
             }
         };
-        // println!(
-        //     "[FINDME] decrease_subcmp_counter({subcmp_idx})\n BEFORE: {}\n AFTER: {}",
-        //     temp_str_self, new_env
-        // );
         ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
     }
 
@@ -309,15 +291,11 @@ impl<'a> ExtractedFuncEnvData<'a> {
 
     pub fn create_subcmp(
         self,
-        name: &'a String,
-        base_index: usize,
-        count: usize,
-        template_id: usize,
+        _name: &'a String,
+        _base_index: usize,
+        _count: usize,
+        _template_id: usize,
     ) -> Self {
-        todo!("create_subcmp({name},{base_index},{count},{template_id})");
-        // ExtractedFuncEnvData {
-        //     base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
-        //     remap: self.remap,
-        // }
+        unreachable!()
     }
 }
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index 320c0cbff..afe8d1f41 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -318,10 +318,6 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
                 }
             }
             AddressType::SubcmpSignal { cmp_address, input_information, .. } => {
-                // println!(
-                //     "cmp_address = {:?}, input_information = {:?}",
-                //     cmp_address, input_information
-                // );
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
                 let addr = addr
                     .expect(
@@ -492,9 +488,6 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
             };
             (Some(v), env)
         };
-        // println!("[execute_call_bucket] {:?}", bucket);
-        // println!(" -> value = {:?}", res.0);
-        // println!(" -> new env = {}", res.1);
 
         // Write the result in the destination according to the ReturnType
         match &bucket.return_info {
@@ -611,16 +604,10 @@ impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
         return match cond_bool_result {
             None => (None, None, env),
             Some(true) => {
-                // if cfg!(debug_assertions) {
-                //     println!("Running then branch");
-                // }
                 let (ret, env) = self.execute_instructions(&true_branch, env, observe);
                 (ret, Some(true), env)
             }
             Some(false) => {
-                // if cfg!(debug_assertions) {
-                //     println!("Running else branch");
-                // }
                 let (ret, env) = self.execute_instructions(&false_branch, env, observe);
                 (ret, Some(false), env)
             }
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
index 6e69ba71d..dbbcaf250 100644
--- a/circuit_passes/src/passes/loop_unroll/body_extractor.rs
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -264,7 +264,8 @@ impl LoopBodyExtractor {
             .allocate(),
         );
         // Create new function to hold the copied body
-        // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+        // NOTE: This name must start with `GENERATED_FN_PREFIX` (which is the prefix
+        //  of `LOOP_BODY_FN_PREFIX`) so that `ExtractedFunctionCtx` will be used.
         let func_name = format!("{}{}", LOOP_BODY_FN_PREFIX, new_id());
         let new_func = Box::new(FunctionCodeInfo {
             source_file_id: bucket.source_file_id,
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 962468cc2..8f57cf2d4 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -27,12 +27,16 @@ impl<'a> VariableValues<'a> {
 
 impl Debug for VariableValues<'_> {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        // write!(
-        //     f,
-        //     "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
-        //     self.env_at_header, self.loadstore_to_index
-        // )
-        write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
+        let print_header_env = false;
+        if print_header_env {
+            write!(
+                f,
+                "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
+                self.env_at_header, self.loadstore_to_index
+            )
+        } else {
+            write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
+        }
     }
 }
 
@@ -130,7 +134,6 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
         //  not give the same result when done at the call site, outside of the new function.
         let interp = self.mem.build_interpreter(self.global_data, self);
         let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
-        // println!("--   LOC: var/sig[{:?}]", idx_loc); //TODO: TEMP
         if let Some(idx_loc) = idx_loc {
             let (idx_header, _) =
                 interp.execute_instruction(location, self.get_header_env_clone(), false);
diff --git a/circuit_passes/src/passes/loop_unroll/mod.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
index 2625da4c2..d20b21b59 100644
--- a/circuit_passes/src/passes/loop_unroll/mod.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -52,21 +52,6 @@ impl<'d> LoopUnrollPass<'d> {
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        // {
-        //     println!("\nTry unrolling loop {}:", bucket.id); //TODO: TEMP
-        //     for (i, s) in bucket.body.iter().enumerate() {
-        //         println!(
-        //             "[{}/{}]{}",
-        //             i + 1,
-        //             bucket.body.len(),
-        //             compiler::intermediate_representation::ToSExp::to_sexp(&**s).to_pretty(100)
-        //         );
-        //     }
-        //     for (i, s) in bucket.body.iter().enumerate() {
-        //         println!("[{}/{}]{:?}", i + 1, bucket.body.len(), s);
-        //     }
-        //     println!("LOOP ENTRY env {}", env); //TODO: TEMP
-        // }
         // Compute loop iteration count. If unknown, return immediately.
         let recorder = EnvRecorder::new(self.global_data, &self.memory);
         {
@@ -87,7 +72,6 @@ impl<'d> LoopUnrollPass<'d> {
                 inner_env = new_env;
             }
         }
-        // println!("recorder = {:?}", recorder); //TODO: TEMP
 
         let mut block_body = vec![];
         if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
@@ -122,7 +106,6 @@ impl<'d> LoopUnrollPass<'d> {
     // Will take the unrolled loop and interpretate it
     // checking if new loop buckets appear
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
-        // println!("\ncontinue_inside {:?} with {} ", bucket, env);
         let interpreter = self.memory.build_interpreter(self.global_data, self);
         let env = Env::new_unroll_block_env(env.clone(), &self.extractor);
         interpreter.execute_block_bucket(bucket, env, true);
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 9a3362370..81275e883 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -49,7 +49,6 @@ impl InterpreterObserver for SimplificationPass<'_> {
         let (eval, _) = interpreter.execute_compute_bucket(bucket, env, false);
         let eval = eval.expect("Compute bucket must produce a value!");
         if !eval.is_unknown() {
-            // println!("\nCan replace {:?} with {}", bucket, eval);
             self.compute_replacements.borrow_mut().insert(bucket.clone(), eval);
             return false;
         }
@@ -91,7 +90,6 @@ impl InterpreterObserver for SimplificationPass<'_> {
         if let Some(eval) = eval {
             // Call buckets may not return a value directly
             if !eval.is_unknown() {
-                // println!("\nCan replace {:?} with {}", bucket, eval);
                 self.call_replacements.borrow_mut().insert(bucket.clone(), eval);
                 return false;
             }
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 4fa04e0b9..2d0c1cec7 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -184,11 +184,7 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
         _producer: &dyn LLVMIRProducer<'a>,
         _id: AnyValueEnum<'a>,
     ) -> PointerValue<'a> {
-        //NOTE: only used by CreateCmpBucket::produce_llvm_ir
-        //TODO: I think instead of ID defining an array index in the gep, it will need to define a static index
-        //  in an array of subcomponents in this context (i.e. self.subcmps[id] with offsets [0,0]).
-        todo!("load_subcmp {} from {:?}", _id, self.args);
-        //create_gep(producer, self.subcmps, &[zero(producer), id.into_int_value()]).into_pointer_value()
+        unreachable!()
     }
 
     fn load_subcmp_addr(
@@ -205,11 +201,11 @@ impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
 
     fn load_subcmp_counter(
         &self,
-        producer: &dyn LLVMIRProducer<'a>,
+        _producer: &dyn LLVMIRProducer<'a>,
         _id: AnyValueEnum<'a>,
-    ) -> PointerValue<'a> {
-        // Use null pointer to force StoreBucket::produce_llvm_ir to skip counter increment.
-        producer.context().i32_type().ptr_type(Default::default()).const_null()
+    ) -> Option<PointerValue<'a>> {
+        // Use None to force StoreBucket::produce_llvm_ir to skip counter increment.
+        None
     }
 
     fn get_signal(
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index 6b7cc65ed..3a164b76d 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -66,7 +66,7 @@ pub trait TemplateCtx<'a> {
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
-    ) -> PointerValue<'a>;
+    ) -> Option<PointerValue<'a>>;
 
     /// Returns a pointer to the signal associated to the index
     fn get_signal(
diff --git a/code_producers/src/llvm_elements/template.rs b/code_producers/src/llvm_elements/template.rs
index 3d3090c0f..d23880676 100644
--- a/code_producers/src/llvm_elements/template.rs
+++ b/code_producers/src/llvm_elements/template.rs
@@ -90,13 +90,15 @@ impl<'a> TemplateCtx<'a> for StdTemplateCtx<'a> {
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
-    ) -> PointerValue<'a> {
-        create_gep(
-            producer,
-            self.subcmps,
-            &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
+    ) -> Option<PointerValue<'a>> {
+        Some(
+            create_gep(
+                producer,
+                self.subcmps,
+                &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
+            )
+            .into_pointer_value(),
         )
-        .into_pointer_value()
     }
 
     fn get_signal(
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index 1da8acc6d..564715379 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -95,7 +95,7 @@ impl WriteLLVMIR for LoadBucket {
                             let addr = cmp_address.produce_llvm_ir(producer)
                                 .expect("The address of a subcomponent must yield a value!");
                             if *counter_override {
-                                return producer.template_ctx().load_subcmp_counter(producer, addr)
+                                return producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
                             } else {
                                 let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                                 create_gep(producer, subcmp, &[zero(producer)])
@@ -113,7 +113,7 @@ impl WriteLLVMIR for LoadBucket {
                     AddressType::SubcmpSignal { cmp_address, counter_override, ..  } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         if *counter_override {
-                            producer.template_ctx().load_subcmp_counter(producer, addr)
+                            producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
                         } else {
                             let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
                             if subcmp.get_type().get_element_type().is_array_type() {
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 5265b4a82..c2a7182d6 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -177,7 +177,7 @@ impl StoreBucket{
         if let AddressType::SubcmpSignal { cmp_address, .. } = &dest_address_type {
             let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
             let counter = producer.template_ctx().load_subcmp_counter(producer, addr);
-            if !counter.is_null() {
+            if let Some(counter) = counter {
                 let value = create_load_with_name(producer, counter, "load.subcmp.counter");
                 let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, 1), "decrement.counter");
                 assert_eq!(1, context.size, "unhandled array store");

From 73575e73d0e7d7955500aff2126946dbbabd4619 Mon Sep 17 00:00:00 2001
From: Tim Hoffman <timothy.hoffman@veridise.com>
Date: Wed, 11 Oct 2023 16:59:56 -0500
Subject: [PATCH 42/42] bring in change from another PR

---
 circom/tests/subcmps/mapped3.circom                       | 1 -
 circom/tests/subcmps/mapped4.circom                       | 1 -
 .../src/passes/loop_unroll/loop_env_recorder.rs           | 8 +++++++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index 8b2de038b..6393b7fdc 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index dba6889df..7312cbc80 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,7 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
 // RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
-// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template MatrixOp(q) {
     signal input inp[5][3];
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
index 8f57cf2d4..2c79f94fd 100644
--- a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -148,7 +148,13 @@ impl<'a, 'd> EnvRecorder<'a, 'd> {
 
     fn compute_index_from_rule(&self, env: &Env, loc: &LocationRule) -> Value {
         match loc {
-            LocationRule::Mapped { .. } => todo!(), //not sure if/how to handle that
+            LocationRule::Mapped { .. } => {
+                //TODO: It's not an array index in this case, at least not immediately but I think it can
+                //  ultimately be converted to one because the subcmp storage is an array of values. Is
+                //  that value known now? Do I also need the AddressType to compute the correct index?
+                //SEE: https://veridise.atlassian.net/browse/VAN-704
+                Value::Unknown
+            }
             LocationRule::Indexed { location, .. } => self.compute_index_from_inst(env, location),
         }
     }