diff --git a/Cargo.lock b/Cargo.lock
index 8fac819e5..54a515fe2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -218,7 +218,9 @@ dependencies = [
  "circom_algebra",
  "code_producers",
  "compiler",
+ "const_format",
  "constraint_generation",
+ "indexmap 2.0.0",
  "intervallum",
  "program_structure",
 ]
@@ -285,6 +287,26 @@ dependencies = [
  "rand 0.8.5",
 ]
 
+[[package]]
+name = "const_format"
+version = "0.2.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c990efc7a285731f9a4378d81aff2f0e85a2c8781a05ef0f8baa8dac54d0ff48"
+dependencies = [
+ "const_format_proc_macros",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e026b6ce194a874cb9cf32cd5772d1ef9767cc8fcb5765948d74f37a9d8b2bf6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
 [[package]]
 name = "constant_tracking"
 version = "2.0.0"
@@ -424,6 +446,12 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "equivalent"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+
 [[package]]
 name = "errno"
 version = "0.3.1"
@@ -575,6 +603,12 @@ version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
 
+[[package]]
+name = "hashbrown"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a"
+
 [[package]]
 name = "hermit-abi"
 version = "0.1.19"
@@ -614,7 +648,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99"
 dependencies = [
  "autocfg 1.1.0",
- "hashbrown",
+ "hashbrown 0.12.3",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.14.0",
 ]
 
 [[package]]
@@ -982,7 +1026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4"
 dependencies = [
  "fixedbitset",
- "indexmap",
+ "indexmap 1.9.3",
 ]
 
 [[package]]
diff --git a/circom/src/compilation_user.rs b/circom/src/compilation_user.rs
index 7732d51f3..dd6e3b554 100644
--- a/circom/src/compilation_user.rs
+++ b/circom/src/compilation_user.rs
@@ -1,14 +1,12 @@
 use ansi_term::Colour;
 use circuit_passes::passes::PassManager;
-use compiler::compiler_interface;
-use compiler::compiler_interface::{Config, VCP};
+use compiler::compiler_interface::{self, Config, VCP};
 use program_structure::error_definition::Report;
 use program_structure::error_code::ReportCode;
 use program_structure::file_definition::FileLibrary;
 use program_structure::program_archive::ProgramArchive;
 use crate::VERSION;
 
-
 pub struct CompilerConfig {
     pub js_folder: String,
     pub wasm_name: String,
@@ -61,16 +59,16 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
     }
 
     if config.llvm_flag {
-        // Only run this passes if we are going to generate LLVM code
+        // Only run the passes if we are going to generate LLVM code
         let pm = PassManager::new();
         circuit = pm
-            .schedule_loop_unroll_pass(prime)
-            .schedule_conditional_flattening_pass(prime)
-            .schedule_mapped_to_indexed_pass(prime)
-            .schedule_unknown_index_sanitization_pass(prime)
-            .schedule_simplification_pass(prime)
-            .schedule_deterministic_subcmp_invoke_pass(prime)
-            .transform_circuit(circuit);
+            .schedule_loop_unroll_pass()
+            .schedule_conditional_flattening_pass()
+            .schedule_mapped_to_indexed_pass()
+            .schedule_unknown_index_sanitization_pass()
+            .schedule_simplification_pass()
+            .schedule_deterministic_subcmp_invoke_pass()
+            .transform_circuit(circuit, prime);
         compiler_interface::write_llvm_ir(
             &mut circuit,
             &program_archive,
@@ -78,11 +76,7 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
             &config.llvm_file,
             config.clean_llvm,
         )?;
-        println!(
-          "{} {}",
-            Colour::Green.paint("Written successfully:"),
-            config.llvm_file
-        );
+        println!("{} {}", Colour::Green.paint("Written successfully:"), config.llvm_file);
     }
 
     match (config.wat_flag, config.wasm_flag) {
@@ -124,7 +118,6 @@ pub fn compile(config: CompilerConfig, program_archive: ProgramArchive, prime: &
     Ok(())
 }
 
-
 fn wat_to_wasm(wat_file: &str, wasm_file: &str) -> Result<(), Report> {
     use std::fs::read_to_string;
     use std::fs::File;
diff --git a/circom/tests/loops/call_inside_loop.circom b/circom/tests/loops/call_inside_loop.circom
index c0bfc1822..912ce3287 100644
--- a/circom/tests/loops/call_inside_loop.circom
+++ b/circom/tests/loops/call_inside_loop.circom
@@ -1,8 +1,8 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
-// XFAIL: .*
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
+//arena = { a[0], a[1], n, b, c, d, e, f, g}
 function fun(a, n, b, c, d, e, f, g) {
 	var x[5];
     for (var i = 0; i < n; i++) {
@@ -11,6 +11,8 @@ function fun(a, n, b, c, d, e, f, g) {
 	return x[0] + x[2] + x[4];
 }
 
+//signal_arena = { out, in }
+//lvars = { m, n, a[0], a[1], b[0], b[1], i }
 template CallInLoop(n, m) {
     signal input in;
     signal output out;
@@ -27,18 +29,6 @@ template CallInLoop(n, m) {
 
 component main = CallInLoop(2, 3);
 
-//// Use the block labels to check that the loop is NOT unrolled
-//CHECK-LABEL: define i256 @fun_{{[0-9]+}}
-//CHECK-SAME: (i256* %[[ARG:[0-9]+]])
-//CHECK-NOT: unrolled_loop{{.*}}:
-//CHECK: loop.cond{{.*}}:
-//CHECK: loop.body{{.*}}:
-//CHECK: loop.end{{.*}}:
-//CHECK-NOT: unrolled_loop{{.*}}:
-//CHECK:   }
-
-//signal_arena = { out, in }
-//lvars = { m, n, a[0], a[1], i, b[0], b[1] }
 //
 //     var a[2];
 //     i = 0;
@@ -54,6 +44,149 @@ component main = CallInLoop(2, 3);
 //     i = 2;
 //     out <-- b[0];
 //
-//CHECK-LABEL: define void @CallInLoop_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: TODO: Code produced currently is incorrect! See https://veridise.atlassian.net/browse/VAN-611
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 3, i256 %1)
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %5, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %call1
+//CHECK-EMPTY: 
+//CHECK-NEXT: call1:
+//CHECK-NEXT:   %fun_0_arena = alloca [15 x i256], align 8
+//CHECK-NEXT:   %0 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 0
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   call void @fr_copy_n(i256* %1, i256* %0, i32 2)
+//CHECK-NEXT:   %2 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 2
+//CHECK-NEXT:   store i256 2, i256* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 3
+//CHECK-NEXT:   store i256 3, i256* %3, align 4
+//CHECK-NEXT:   %4 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 4
+//CHECK-NEXT:   store i256 3, i256* %4, align 4
+//CHECK-NEXT:   %5 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 5
+//CHECK-NEXT:   store i256 3, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 6
+//CHECK-NEXT:   store i256 3, i256* %6, align 4
+//CHECK-NEXT:   %7 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 7
+//CHECK-NEXT:   store i256 3, i256* %7, align 4
+//CHECK-NEXT:   %8 = getelementptr [15 x i256], [15 x i256]* %fun_0_arena, i32 0, i32 8
+//CHECK-NEXT:   store i256 3, i256* %8, align 4
+//CHECK-NEXT:   %9 = bitcast [15 x i256]* %fun_0_arena to i256*
+//CHECK-NEXT:   %call.fun_0 = call i256 @fun_0(i256* %9)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %11)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 4
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fun_0, i256* %12, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @CallInLoop_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %7, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %12 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %12, align 4
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %13 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %13, align 4
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %14 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   br label %unrolled_loop10
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop10:
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0)
+//CHECK-NEXT:   %16 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %16, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store11
+//CHECK-EMPTY: 
+//CHECK-NEXT: store11:
+//CHECK-NEXT:   %17 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %18, i256* %19, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/fib_input.circom b/circom/tests/loops/fib_input.circom
index 6b664f2e6..e821b45bb 100644
--- a/circom/tests/loops/fib_input.circom
+++ b/circom/tests/loops/fib_input.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Fibonacci() {
     signal input nth_fib;
diff --git a/circom/tests/loops/fib_template.circom b/circom/tests/loops/fib_template.circom
index c3ee4e755..003e1fcf1 100644
--- a/circom/tests/loops/fib_template.circom
+++ b/circom/tests/loops/fib_template.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template FibonacciTmpl(N) {
     signal output out;
diff --git a/circom/tests/loops/for_known.circom b/circom/tests/loops/for_known.circom
index 77b7f14ae..acbecf2ed 100644
--- a/circom/tests/loops/for_known.circom
+++ b/circom/tests/loops/for_known.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForKnown(N) {
     signal output out;
diff --git a/circom/tests/loops/for_unknown.circom b/circom/tests/loops/for_unknown.circom
index 888061553..b3d522043 100644
--- a/circom/tests/loops/for_unknown.circom
+++ b/circom/tests/loops/for_unknown.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknown() {
     signal input in;
diff --git a/circom/tests/loops/for_unknown_index.circom b/circom/tests/loops/for_unknown_index.circom
index 87bde3fbb..091cbd8d7 100644
--- a/circom/tests/loops/for_unknown_index.circom
+++ b/circom/tests/loops/for_unknown_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknownIndex() {
     signal input in;
diff --git a/circom/tests/loops/init_nonzero.circom b/circom/tests/loops/init_nonzero.circom
new file mode 100644
index 000000000..9ac1520a6
--- /dev/null
+++ b/circom/tests/loops/init_nonzero.circom
@@ -0,0 +1,38 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Ensure that non-zero initialization of for-loop iteration variable is handled properly.
+template NonZeroInit() {
+    signal input a[9];
+    signal output b[9];
+
+    for (var i = 4; i < 7; i++) {
+        b[i] <-- a[i];
+    }
+    for (var i = 7; i < 9; i++) {
+        b[i] <-- a[i];
+    }
+    for (var i = 0; i < 4; i++) {
+        b[i] <-- a[i];
+    }
+}
+
+component main = NonZeroInit();
+
+//CHECK-LABEL: define void @NonZeroInit_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR1:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %{{.*}}[[VAR1]], align 4
+//CHECK-NEXT:   br label %{{.*}}
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR2:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 7, i256* %{{.*}}[[VAR2]], align 4
+//CHECK-NEXT:   br label %{{.*}}
+//
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %[[VAR3:[0-9]+]] = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[VAR3]], align 4
+//CHECK-NEXT:   br label %{{.*}}
diff --git a/circom/tests/loops/inner_conditional_1.circom b/circom/tests/loops/inner_conditional_1.circom
new file mode 100644
index 000000000..85c98fca5
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_1.circom
@@ -0,0 +1,120 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// IF condition can be known via in-place unrolling but not when body is extracted to a new function
+template InnerConditional1(N) {
+    signal output out;
+
+    var acc = 0;
+    for (var i = 1; i <= N; i++) {
+        if (i < 5) {
+            acc += i;
+        } else {
+            acc -= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+component main = InnerConditional1(10);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 5)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %3, i256 %5)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 %10)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %11, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %13, i256 1)
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %14, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 10, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0)
+//CHECK-NEXT:   %11 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0)
+//CHECK-NEXT:   %13 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 21888242871839275222246405745257275088548364400416034343698204186575808495582, i256* %14, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_10.circom b/circom/tests/loops/inner_conditional_10.circom
new file mode 100644
index 000000000..066ad245c
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_10.circom
@@ -0,0 +1,93 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Sigma() {
+    signal input inp;
+    signal output out;
+}
+
+template Poseidon() {
+    signal input inp;
+
+    component sigmaF[2];
+
+    for (var i=0; i<4; i++) {
+        if (i < 1 || i >= 3) {
+            var k = i < 1 ? 0 : 1;
+            sigmaF[k] = Sigma();
+            sigmaF[k].inp <== inp;
+        }
+    }
+}
+
+component main = Poseidon();
+
+//CHECK-LABEL: define void @Poseidon_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Sigma_0_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %4 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+//CHECK-NEXT:   store i256 %6, i256* %9, align 4
+//CHECK-NEXT:   %10 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %10, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %10, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   call void @Sigma_0_run([0 x i256]* %12)
+//CHECK-NEXT:   %13 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %6, i256 %13, i1* %constraint)
+//CHECK-NEXT:   %14 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 1, i256* %14, align 4
+//CHECK-NEXT:   %15 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %15, align 4
+//CHECK-NEXT:   %16 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %16, align 4
+//CHECK-NEXT:   %17 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %17, align 4
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   %19 = load i256, i256* %18, align 4
+//CHECK-NEXT:   %20 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %21 = load [0 x i256]*, [0 x i256]** %20, align 8
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %21, i32 0, i32 1
+//CHECK-NEXT:   store i256 %19, i256* %22, align 4
+//CHECK-NEXT:   %23 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %23, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %23, align 4
+//CHECK-NEXT:   %24 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %25 = load [0 x i256]*, [0 x i256]** %24, align 8
+//CHECK-NEXT:   call void @Sigma_0_run([0 x i256]* %25)
+//CHECK-NEXT:   %26 = load i256, i256* %22, align 4
+//CHECK-NEXT:   %constraint3 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %19, i256 %26, i1* %constraint3)
+//CHECK-NEXT:   %27 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %27, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_2.circom b/circom/tests/loops/inner_conditional_2.circom
new file mode 100644
index 000000000..5f0a7648f
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_2.circom
@@ -0,0 +1,218 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is known constant
+template InnerConditional2(N, T) {
+    signal output out;
+
+    var acc = 1;
+    for (var i = 1; i <= N; i++) {
+        if (T == 0) {
+            acc += i;
+        } else {
+            acc *= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+template runner() {
+    signal output out;
+
+    component a = InnerConditional2(4, 0);
+    component b = InnerConditional2(5, 1);
+
+    out <-- a.out + b.out;
+}
+
+component main = runner();
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %4, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional2_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [4 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 11, i256* %9, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional2_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [4 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 5, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %4, align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 120, i256* %10, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @runner_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @InnerConditional2_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @InnerConditional2_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @InnerConditional2_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @InnerConditional2_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 0
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i32 0
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_3.circom b/circom/tests/loops/inner_conditional_3.circom
new file mode 100644
index 000000000..554282a24
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_3.circom
@@ -0,0 +1,109 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is NOT known
+template InnerConditional3(N) {
+    signal output out;
+    signal input in;
+
+    var acc = 0;
+    for (var i = 1; i <= N; i++) {
+        if (in == 0) {
+            acc += i;
+        } else {
+            acc -= i;
+        }
+    }
+
+    out <-- acc;
+}
+
+component main = InnerConditional3(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %1, i256 0)
+//CHECK-NEXT:   br i1 %call.fr_eq, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %3, i256 %5)
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %6, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 %10)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %11, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %13, i256 1)
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %14, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional3_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %7 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %8, i256* %9, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_4.circom b/circom/tests/loops/inner_conditional_4.circom
new file mode 100644
index 000000000..54f13c096
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_4.circom
@@ -0,0 +1,101 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition can be known via unrolling, arrays used inside indexed on iteration variable
+template InnerConditional4(N) {
+    signal output out[N];
+    signal input in;
+
+    for (var i = 0; i < N; i++) {
+        if (i < 3) {
+            out[i] <-- -in;
+        } else {
+            out[i] <-- in;
+        }
+    }
+}
+
+component main = InnerConditional4(6);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %1, i256 3)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 6
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional4_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 6, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* null)
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* null)
+//CHECK-NEXT:   %7 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* null)
+//CHECK-NEXT:   %9 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* null, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* null, i256* %12)
+//CHECK-NEXT:   %13 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %13, [0 x i256]* %0, i256* null, i256* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_5.circom b/circom/tests/loops/inner_conditional_5.circom
new file mode 100644
index 000000000..37748c52d
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_5.circom
@@ -0,0 +1,194 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is known constant, arrays used inside indexed on iteration variable
+template InnerConditional5(N, T) {
+    signal output out[N];
+
+    for (var i = 0; i < N; i++) {
+        if (T == 0) {
+            out[i] <-- 777;
+        } else {
+            out[i] <-- 999;
+        }
+    }
+}
+
+template runner() {
+    signal output out;
+
+    component a = InnerConditional5(4, 0);
+    component b = InnerConditional5(5, 1);
+
+    out <-- a.out[1] + b.out[0];
+}
+
+component main = runner();
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 777, i256* %0, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 1)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %3, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %0, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 1)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %3, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional5_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional5_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 5, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %8, [0 x i256]* %0, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @runner_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @InnerConditional5_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @InnerConditional5_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @InnerConditional5_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @InnerConditional5_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %12 = load [0 x i256]*, [0 x i256]** %11, align 8
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i32 0
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_6.circom b/circom/tests/loops/inner_conditional_6.circom
new file mode 100644
index 000000000..233e74b44
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_6.circom
@@ -0,0 +1,114 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// if condition is NOT known, arrays used inside indexed on iteration variable
+// UPDATE: Circom compiler does not allow the commented block
+template InnerConditional6(N) {
+    signal output out[N];
+    signal input in[N];
+
+    for (var i = 0; i < N; i++) {
+        // if (in[i] == 0) {
+        //     out[i] <-- 999;
+        // } else {
+        //     out[i] <-- 888;
+        // }
+        var x;
+        if (in[i] == 0) {
+            x = 999;
+        } else {
+            x = 888;
+        }
+        out[i] <-- x;
+    }
+}
+
+component main = InnerConditional6(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %branch2
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch2:
+//CHECK-NEXT:   %1 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %2, i256 0)
+//CHECK-NEXT:   br i1 %call.fr_eq, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 999, i256* %3, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 888, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return8
+//CHECK-EMPTY: 
+//CHECK-NEXT: return8:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional6_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %unrolled_loop3
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop3:
+//CHECK-NEXT:   %3 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %3, [0 x i256]* %0, i256* %4, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 6
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 7
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %13, i256* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_7.circom b/circom/tests/loops/inner_conditional_7.circom
new file mode 100644
index 000000000..2ce359d33
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_7.circom
@@ -0,0 +1,269 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template InnerConditional7(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        // NOTE: When processing the outer loop, the statements indexed with 'j' are determined
+        //  NOT safe to move into a new function since 'j' is unknown. That results in the outer
+        //  loop unrolling without extrating the body to a new function. Then the three copies
+        //  of the inner loop are processed and their bodies are extracted to new functions and
+        //  replaced with calls to those functions before unrolling. So it ends up creating
+        //  three slightly different functions for this innermost body, one for each iteration
+        //  of the outer loop. Within each of those functions, 'i' is a known fixed value.
+        for (var j = 0; j < N; j++) {
+            if (i > 1) {
+                a[j] += 999;
+            } else {
+                a[j] -= 111;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional7(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 777, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 777, i256* %4, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %6, i256 111)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %9, i256 1)
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %10, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 777, i256* %2, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %4)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %6)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 111)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %9, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional7_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [6 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %unrolled_loop6
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop6:
+//CHECK-NEXT:   %6 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0, i256 2
+//CHECK-NEXT:   %15 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %14, i256* %16)
+//CHECK-NEXT:   %17 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
+//CHECK-NEXT:   %20 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %17, [0 x i256]* %0, i256* %19, i256* %21)
+//CHECK-NEXT:   %22 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %22, align 4
+//CHECK-NEXT:   %23 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %23, align 4
+//CHECK-NEXT:   %24 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %25 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %25, i32 0, i256 1
+//CHECK-NEXT:   %27 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %28 = getelementptr [0 x i256], [0 x i256]* %27, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %24, [0 x i256]* %0, i256* %26, i256* %28)
+//CHECK-NEXT:   %29 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %30 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0, i256 2
+//CHECK-NEXT:   %32 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %29, [0 x i256]* %0, i256* %31, i256* %33)
+//CHECK-NEXT:   %34 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %35 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 3
+//CHECK-NEXT:   %37 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %34, [0 x i256]* %0, i256* %36, i256* %38)
+//CHECK-NEXT:   %39 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %39, align 4
+//CHECK-NEXT:   %40 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %40, align 4
+//CHECK-NEXT:   %41 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %42 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %42, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %41, [0 x i256]* %0, i256* %43)
+//CHECK-NEXT:   %44 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %45 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %46 = getelementptr [0 x i256], [0 x i256]* %45, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %44, [0 x i256]* %0, i256* %46)
+//CHECK-NEXT:   %47 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %48 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %49 = getelementptr [0 x i256], [0 x i256]* %48, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %47, [0 x i256]* %0, i256* %49)
+//CHECK-NEXT:   %50 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 3, i256* %50, align 4
+//CHECK-NEXT:   br label %store7
+//CHECK-EMPTY: 
+//CHECK-NEXT: store7:
+//CHECK-NEXT:   %51 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 1554, i256* %51, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_8.circom b/circom/tests/loops/inner_conditional_8.circom
new file mode 100644
index 000000000..2f7c3f217
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_8.circom
@@ -0,0 +1,175 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// like inner_conditional_7 but with 'i' and 'j' uses swapped
+template InnerConditional8(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        for (var j = 0; j < N; j++) {
+            if (j > 1) {
+                a[i] += 999;
+            } else {
+                a[i] -= 111;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional8(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %loop2
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop2:
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %4, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %store9
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 999)
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %9, i256 111)
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %10, align 4
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %13, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: store9:
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %15, i256 1)
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %16, align 4
+//CHECK-NEXT:   br label %return10
+//CHECK-EMPTY: 
+//CHECK-NEXT: return10:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional8_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %6 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   br label %unrolled_loop7
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop7:
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %12, i32 0, i256 1
+//CHECK-NEXT:   %14 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11, i256* %13, i256* %15)
+//CHECK-NEXT:   %16 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %17, i32 0, i256 2
+//CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %19, i32 0, i256 2
+//CHECK-NEXT:   %21 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %21, i32 0, i256 2
+//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %16, [0 x i256]* %0, i256* %18, i256* %20, i256* %22, i256* %24)
+//CHECK-NEXT:   %25 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %26, i32 0, i256 3
+//CHECK-NEXT:   %28 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i256 3
+//CHECK-NEXT:   %30 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %30, i32 0, i256 3
+//CHECK-NEXT:   %32 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %32, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %25, [0 x i256]* %0, i256* %27, i256* %29, i256* %31, i256* %33)
+//CHECK-NEXT:   %34 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %35 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %35, i32 0, i256 4
+//CHECK-NEXT:   %37 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %37, i32 0, i256 4
+//CHECK-NEXT:   %39 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %39, i32 0, i256 4
+//CHECK-NEXT:   %41 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %42 = getelementptr [0 x i256], [0 x i256]* %41, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %34, [0 x i256]* %0, i256* %36, i256* %38, i256* %40, i256* %42)
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 3552, i256* %43, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_conditional_9.circom b/circom/tests/loops/inner_conditional_9.circom
new file mode 100644
index 000000000..345e26d0c
--- /dev/null
+++ b/circom/tests/loops/inner_conditional_9.circom
@@ -0,0 +1,178 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template InnerConditional9(N) {
+    signal output out;
+
+    var a[N];
+    for (var i = 0; i < N; i++) {
+        if (i > 1) {
+            // runs when i∈{0,1}
+            for (var j = 0; j < N; j++) {
+                a[i] += 999;
+            }
+        } else {
+            // runs when i∈{2,3}
+            for (var j = 0; j < N; j++) {
+                a[i] -= 999;
+            }
+        }
+    }
+
+    out <-- a[0] + a[1];
+}
+
+component main = InnerConditional9(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %branch1
+//CHECK-EMPTY: 
+//CHECK-NEXT: branch1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_gt = call i1 @fr_gt(i256 %1, i256 1)
+//CHECK-NEXT:   br i1 %call.fr_gt, label %if.then, label %if.else
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.then:
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.else:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %loop.cond2
+//CHECK-EMPTY: 
+//CHECK-NEXT: if.merge:
+//CHECK-NEXT:   br label %store11
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %5, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 999)
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %11, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond2:
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %13 = load i256, i256* %12, align 4
+//CHECK-NEXT:   %call.fr_lt5 = call i1 @fr_lt(i256 %13, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt5, label %loop.body3, label %loop.end4
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body3:
+//CHECK-NEXT:   %14 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %15 = load i256, i256* %14, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %15, i256 999)
+//CHECK-NEXT:   %16 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_sub, i256* %16, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %call.fr_add6 = call i256 @fr_add(i256 %18, i256 1)
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add6, i256* %19, align 4
+//CHECK-NEXT:   br label %loop.cond2
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end4:
+//CHECK-NEXT:   br label %if.merge
+//CHECK-EMPTY: 
+//CHECK-NEXT: store11:
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %21 = load i256, i256* %20, align 4
+//CHECK-NEXT:   %call.fr_add7 = call i256 @fr_add(i256 %21, i256 1)
+//CHECK-NEXT:   %22 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add7, i256* %22, align 4
+//CHECK-NEXT:   br label %return12
+//CHECK-EMPTY: 
+//CHECK-NEXT: return12:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerConditional9_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [7 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 4, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %4 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %4, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %5 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %6 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   br label %unrolled_loop7
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop7:
+//CHECK-NEXT:   %7 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i256 1
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %9, i256* %11, i256* null, i256* null)
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %13, i32 0, i256 2
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %15, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %12, [0 x i256]* %0, i256* %14, i256* %16, i256* null, i256* null)
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i256 3
+//CHECK-NEXT:   %20 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %20, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %17, [0 x i256]* %0, i256* null, i256* null, i256* %19, i256* %21)
+//CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %23, i32 0, i256 4
+//CHECK-NEXT:   %25 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %26 = getelementptr [0 x i256], [0 x i256]* %25, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %22, [0 x i256]* %0, i256* null, i256* null, i256* %24, i256* %26)
+//CHECK-NEXT:   br label %store8
+//CHECK-EMPTY: 
+//CHECK-NEXT: store8:
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 21888242871839275222246405745257275088548364400416034343698204186575808487625, i256* %27, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loop_simple.circom b/circom/tests/loops/inner_loop_simple.circom
index a9c14299c..d1e0b00c8 100644
--- a/circom/tests/loops/inner_loop_simple.circom
+++ b/circom/tests/loops/inner_loop_simple.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n, m) {
     signal input in[m];
@@ -17,61 +17,104 @@ template InnerLoops(n, m) {
 
 component main = InnerLoops(2, 3);
 
-//signal_arena = { out, in[0], in[1], in[2] }
-//lvars = { n, m, b[0], b[1], i, j }
-
-//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:   %[[T06:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T06]], align 4
-//CHECK-NEXT:   %[[T07:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T08:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T07]], align 4
-//CHECK-NEXT:   %[[T09:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T08]], i256* %{{.*}}[[T09]], align 4
-//CHECK-NEXT:   %[[T10:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T10]], align 4
-//CHECK-NEXT:   %[[T11:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 2
-//CHECK-NEXT:   %[[T12:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T11]], align 4
-//CHECK-NEXT:   %[[T13:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T12]], i256* %{{.*}}[[T13]], align 4
-//CHECK-NEXT:   %[[T14:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T14]], align 4
-//CHECK-NEXT:   %[[T15:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 3
-//CHECK-NEXT:   %[[T16:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T15]], align 4
-//CHECK-NEXT:   %[[T17:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T16]], i256* %{{.*}}[[T17]], align 4
-//CHECK-NEXT:   %[[T18:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 3, i256* %{{.*}}[[T18]], align 4
-//CHECK-NEXT:   %[[T19:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T19]], align 4
-//CHECK-NEXT:   %[[T20:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T20]], align 4
-//CHECK-NEXT:   %[[T21:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T22:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T21]], align 4
-//CHECK-NEXT:   %[[T23:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T22]], i256* %{{.*}}[[T23]], align 4
-//CHECK-NEXT:   %[[T24:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T24]], align 4
-//CHECK-NEXT:   %[[T25:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 2
-//CHECK-NEXT:   %[[T26:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T25]], align 4
-//CHECK-NEXT:   %[[T27:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T26]], i256* %{{.*}}[[T27]], align 4
-//CHECK-NEXT:   %[[T28:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T28]], align 4
-//CHECK-NEXT:   %[[T29:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 3
-//CHECK-NEXT:   %[[T30:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T29]], align 4
-//CHECK-NEXT:   %[[T31:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 %{{.*}}[[T30]], i256* %{{.*}}[[T31]], align 4
-//CHECK-NEXT:   %[[T32:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
-//CHECK-NEXT:   store i256 3, i256* %{{.*}}[[T32]], align 4
-//CHECK-NEXT:   %[[T33:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T33]], align 4
-//CHECK-NEXT:   br label %store[[LBL:[0-9]+]]
-//CHECK-EMPTY:
-//CHECK-NEXT: store{{.*}}[[LBL]]:
-//CHECK-NEXT:   %[[T34:[[:alnum:]_.]+]] = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T35:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T34]], align 4
-//CHECK-NEXT:   %[[T36:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   store i256 %{{.*}}[[T35]], i256* %{{.*}}[[T36]], align 4
-//CHECK:   }
+// %0 (i.e. signal arena) = { out, in[0], in[1], in[2] }
+// %lvars = { n, m, b[0], b[1], i, j }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 2
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0, i256* %8)
+//CHECK-NEXT:   %9 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
+//CHECK-NEXT:   %13 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %13, align 4
+//CHECK-NEXT:   %14 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   %15 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
+//CHECK-NEXT:   %17 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   %19 = bitcast [6 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
+//CHECK-NEXT:   %21 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %21, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %22 = getelementptr [6 x i256], [6 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %23 = load i256, i256* %22, align 4
+//CHECK-NEXT:   %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %23, i256* %24, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops.circom b/circom/tests/loops/inner_loops.circom
index 7205dfb5f..c2cef8c42 100644
--- a/circom/tests/loops/inner_loops.circom
+++ b/circom/tests/loops/inner_loops.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
@@ -8,6 +8,13 @@ template InnerLoops(n) {
 
     for (var i = 0; i < n; i++) {
         for (var j = 0; j <= i; j++) {
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling. So
+            //  it ends up creating two slightly different functions for this innermost body,
+            //  one for each iteration of the outer loop (i.e. when b=0 and when b=1).
             b[i] += a[i - j];
         }
     }
@@ -15,67 +22,112 @@ template InnerLoops(n) {
 
 component main = InnerLoops(2);
 //
-//ARG = { a[0], a[1] }
-//lvars = { n, b[0], b[1], i, j }
+// %0 (i.e. signal arena) = { a[0], a[1] }
+// %lvars = { n, b[0], b[1], i, j }
+//
 //unrolled code:
-//	b[0] = b[0] + a[0 - 0 = 0];
-//	b[1] = b[1] + a[1 - 0 = 1];
-//	b[1] = b[1] + a[1 - 1 = 0];
+//	b[0] = b[0] + a[0 - 0 = 0];     //extracted function 1
+//	b[1] = b[1] + a[1 - 0 = 1];     //extracted function 2
+//	b[1] = b[1] + a[1 - 1 = 0];     //extracted function 2
 //
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %7, i256 %9)
+//CHECK-NEXT:   %call.fr_cast_to_addr4 = call i32 @fr_cast_to_addr(i256 %call.fr_sub)
+//CHECK-NEXT:   %mul_addr5 = mul i32 1, %call.fr_cast_to_addr4
+//CHECK-NEXT:   %add_addr6 = add i32 %mul_addr5, 0
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 %add_addr6
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 %11)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add7 = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add7, i256* %15, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %3)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 1
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %5, i256 %7)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add4 = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %call.fr_add4, i256* %11, align 4
+//CHECK-NEXT:   br label %return3
+//CHECK-EMPTY: 
+//CHECK-NEXT: return3:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
 //
-//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run
-//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//// Use the block labels to check that the loop is unrolled and check the unrolled body
-//CHECK-NOT: loop.cond{{.*}}:
-//CHECK-NOT: loop.body{{.*}}:
-//CHECK-NOT: loop.end{{.*}}:
-//CHECK:      unrolled_loop{{.*}}:
-//				// j = 0
-//CHECK-NEXT:   %[[T01:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T01]], align 4
-//				// b[0] = b[0] + a[0]
-//CHECK-NEXT:   %[[T02:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   %[[T03:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T02]], align 4
-//CHECK-NEXT:   %[[T04:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   %[[T05:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T04]], align 4
-//CHECK-NEXT:   %[[T06:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T03]], i256 %{{.*}}[[T05]])
-//CHECK-NEXT:   %[[T07:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:   store i256 %{{.*}}[[T06]], i256* %{{.*}}[[T07]], align 4
-//				// j = 1
-//CHECK-NEXT:   %[[T08:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T08]], align 4
-//				// i = 1
-//CHECK-NEXT:   %[[T09:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T09]], align 4
-//				// j = 0
-//CHECK-NEXT:   %[[T10:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 0, i256* %{{.*}}[[T10]], align 4
-//				// b[1] = b[1] + a[1]
-//CHECK-NEXT:   %[[T11:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T12:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T11]], align 4
-//CHECK-NEXT:   %[[T13:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 1
-//CHECK-NEXT:   %[[T14:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T13]], align 4
-//CHECK-NEXT:   %[[T15:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T12]], i256 %{{.*}}[[T14]])
-//CHECK-NEXT:   %[[T16:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T15]], i256* %{{.*}}[[T16]], align 4
-//				// j = 1
-//CHECK-NEXT:   %[[T17:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 1, i256* %{{.*}}[[T17]], align 4
-//				// b[1] = b[1] + a[0]
-//CHECK-NEXT:   %[[T18:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   %[[T19:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T18]], align 4
-//CHECK-NEXT:   %[[T20:[[:alnum:]_.]+]] = getelementptr [0 x i256], [0 x i256]* %{{.*}}[[ARG]], i32 0, i32 0
-//CHECK-NEXT:   %[[T21:[[:alnum:]_.]+]] = load i256, i256* %{{.*}}[[T20]], align 4
-//CHECK-NEXT:   %[[T22:[[:alnum:]_.]+]] = call i256 @fr_add(i256 %{{.*}}[[T19]], i256 %{{.*}}[[T21]])
-//CHECK-NEXT:   %[[T23:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:   store i256 %{{.*}}[[T22]], i256* %{{.*}}[[T23]], align 4
-//				// j = 2
-//CHECK-NEXT:   %[[T24:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T24]], align 4
-//				// i = 2
-//CHECK-NEXT:   %[[T25:[[:alnum:]_.]+]] = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:   store i256 2, i256* %{{.*}}[[T25]], align 4
-//CHECK-NOT: loop.cond{{.*}}:
-//CHECK-NOT: loop.body{{.*}}:
-//CHECK-NOT: loop.end{{.*}}:
-//CHECK:   }
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %5, align 4
+//CHECK-NEXT:   %6 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 1, i256* %7, align 4
+//CHECK-NEXT:   %8 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 0, i256* %8, align 4
+//CHECK-NEXT:   %9 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %9, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   %11 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %11, [0 x i256]* %0, i256* %12)
+//CHECK-NEXT:   %13 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 2, i256* %13, align 4
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/inner_loops2.circom b/circom/tests/loops/inner_loops2.circom
index fe405a82f..df0775c1d 100644
--- a/circom/tests/loops/inner_loops2.circom
+++ b/circom/tests/loops/inner_loops2.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
@@ -33,3 +33,229 @@ template InnerLoops(n) {
 }
 
 component main = InnerLoops(5);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_4]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_5]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %9 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %9, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %10 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 1, i256* %10, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %11 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %11, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %12 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %14, [0 x i256]* %0, i256* %15)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %16 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 2, i256* %16, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %17 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %17, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %18 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %19 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %18, [0 x i256]* %0, i256* %19)
+//CHECK-NEXT:   %20 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %20, [0 x i256]* %0, i256* %21)
+//CHECK-NEXT:   %22 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %24 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 3, i256* %24, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %25 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %25, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %26 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   %28 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
+//CHECK-NEXT:   %30 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %31 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %30, [0 x i256]* %0, i256* %31)
+//CHECK-NEXT:   %32 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %32, [0 x i256]* %0, i256* %33)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %34 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 4, i256* %34, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %35 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 7
+//CHECK-NEXT:   store i256 0, i256* %35, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %36 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %37 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %36, [0 x i256]* %0, i256* %37)
+//CHECK-NEXT:   %38 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %39 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %38, [0 x i256]* %0, i256* %39)
+//CHECK-NEXT:   %40 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %41 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %40, [0 x i256]* %0, i256* %41)
+//CHECK-NEXT:   %42 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %43 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %42, [0 x i256]* %0, i256* %43)
+//CHECK-NEXT:   %44 = bitcast [8 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %45 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %44, [0 x i256]* %0, i256* %45)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %46 = getelementptr [8 x i256], [8 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 5, i256* %46, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops3.circom b/circom/tests/loops/inner_loops3.circom
index 9c193f329..558d94961 100644
--- a/circom/tests/loops/inner_loops3.circom
+++ b/circom/tests/loops/inner_loops3.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
@@ -24,3 +24,205 @@ template InnerLoops(n) {
 }
 
 component main = InnerLoops(5);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 1, i256* %3, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_3:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_3]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_4:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_4]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_5:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_5]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 5
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %8 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %8, [0 x i256]* %0)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %9 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT:  unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %10 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %14 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %14, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %15 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %15, [0 x i256]* %0, i256* %16)
+//CHECK-NEXT:   %17 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %17, [0 x i256]* %0, i256* %18)
+//CHECK-NEXT:   %19 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_3]]([0 x i256]* %19, [0 x i256]* %0, i256* %20)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %21 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %21, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %22 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %22, [0 x i256]* %0, i256* %23)
+//CHECK-NEXT:   %24 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %25 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %24, [0 x i256]* %0, i256* %25)
+//CHECK-NEXT:   %26 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %26, [0 x i256]* %0, i256* %27)
+//CHECK-NEXT:   %28 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %29 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_4]]([0 x i256]* %28, [0 x i256]* %0, i256* %29)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %30 = getelementptr [7 x i256], [7 x i256]* %lvars, i32 0, i32 6
+//CHECK-NEXT:   store i256 0, i256* %30, align 4
+//CHECK-NEXT:   br label %unrolled_loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %31 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %32 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %31, [0 x i256]* %0, i256* %32)
+//CHECK-NEXT:   %33 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %33, [0 x i256]* %0, i256* %34)
+//CHECK-NEXT:   %35 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %36 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %35, [0 x i256]* %0, i256* %36)
+//CHECK-NEXT:   %37 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %38 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %37, [0 x i256]* %0, i256* %38)
+//CHECK-NEXT:   %39 = bitcast [7 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %40 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//DELETE:   %40 = bitcast i256* %40 to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_5]]([0 x i256]* %39, [0 x i256]* %0, i256* %40)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops4.circom b/circom/tests/loops/inner_loops4.circom
index a85aa859b..6c33e35a4 100644
--- a/circom/tests/loops/inner_loops4.circom
+++ b/circom/tests/loops/inner_loops4.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template InnerLoops(n) {
     signal input a[n];
@@ -8,9 +8,115 @@ template InnerLoops(n) {
     var j;
     for (var i = 0; i < n; i++) {
         for (j = 0; j <= i; j++) {
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling. So it
+            //  ends up creating two slightly different functions for this innermost body, one
+            //  for each iteration of the outer loop (i.e. when b=0 and when b=1). This result
+            //  is logically correct but not optimal in terms of code size.
             b[i] = a[i - j];
         }
     }
 }
 
 component main = InnerLoops(2);
+
+// %0 (i.e. signal arena) = { a[0], a[1] }
+// %lvars = { n, b[0], b[1], j, i }
+//
+//Fully unrolled:
+//  b[0] = a[0];
+//  b[1] = a[1];
+//  b[1] = a[0];
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %3, i256 %5)
+//CHECK-NEXT:   %call.fr_cast_to_addr1 = call i32 @fr_cast_to_addr(i256 %call.fr_sub)
+//CHECK-NEXT:   %mul_addr2 = mul i32 1, %call.fr_cast_to_addr1
+//CHECK-NEXT:   %add_addr3 = add i32 %mul_addr2, 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 %add_addr3
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %7, i256* %8, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %11, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %call.fr_cast_to_addr = call i32 @fr_cast_to_addr(i256 %1)
+//CHECK-NEXT:   %mul_addr = mul i32 1, %call.fr_cast_to_addr
+//CHECK-NEXT:   %add_addr = add i32 %mul_addr, 1
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 %add_addr
+//CHECK-NEXT:   store i256 %3, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @InnerLoops_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %6, align 4
+//CHECK-NEXT:   %7 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   %8 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 1, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   %10 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [5 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = getelementptr [5 x i256], [5 x i256]* %lvars, i32 0, i32 4
+//CHECK-NEXT:   store i256 2, i256* %14, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops5.circom b/circom/tests/loops/inner_loops5.circom
new file mode 100644
index 000000000..8cee6ce78
--- /dev/null
+++ b/circom/tests/loops/inner_loops5.circom
@@ -0,0 +1,89 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// %0 (i.e. signal arena)  = [ out, in ]
+// %lvars =  [ n, temp, i, j ]
+// %subcmps = []
+template Num2Bits(n) {
+    signal input in;
+    signal output out;
+
+	var temp = 0;
+    for (var i = 0; i < n; i++) {
+    	for (var j = 0; j < n; j++) {
+        	temp += (in >> j) & 1;
+        }
+    }
+}
+
+component main = Num2Bits(4);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 0, i256* %0, align 4
+//CHECK-NEXT:   br label %loop{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop{{[0-9]+}}:
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.cond:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_lt = call i1 @fr_lt(i256 %2, i256 4)
+//CHECK-NEXT:   br i1 %call.fr_lt, label %loop.body, label %loop.end
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.body:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %6, i256 %8)
+//CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 %call.fr_bit_and)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add1 = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add1, i256* %12, align 4
+//CHECK-NEXT:   br label %loop.cond
+//CHECK-EMPTY: 
+//CHECK-NEXT: loop.end:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %14, i256 1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %15, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %4 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0)
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %5, [0 x i256]* %0)
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0)
+//CHECK-NEXT:   %7 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %7, [0 x i256]* %0)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/inner_loops6.circom b/circom/tests/loops/inner_loops6.circom
new file mode 100644
index 000000000..ee885c37f
--- /dev/null
+++ b/circom/tests/loops/inner_loops6.circom
@@ -0,0 +1,112 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// %0 (i.e. signal arena)  = [ out[0], out[1], out[2], out[3], in ]
+// %lvars =  [ n, i, j ]
+// %subcmps = []
+template Num2Bits(n) {
+    signal input in;
+    signal output out[n*n];
+
+    for (var i = 0; i < n; i++) {
+    	for (var j = 0; j < n; j++) {
+            // NOTE: When processing the outer loop, the following statement is determined NOT
+            //  safe to move into a new function since it uses 'j' which is unknown. That results
+            //  in the outer loop unrolling without extrating the body to a new function. Then
+            //  the two copies of the inner loop are processed and their bodies are extracted to
+            //  new functions and replaced with calls to those functions before unrolling. So it
+            //  ends up creating two different functions for this innermost body, one for each
+            //  iteration of the outer loop (i.e. when b=0 and when b=1). In this case, those 2
+            //  function are identical. This is logically correct but not optimal in code size.
+        	out[i*n + j] <-- in;
+        }
+    }
+}
+
+component main = Num2Bits(2);
+//
+// %0 (i.e. signal arena) = { out[0], out[1], out[2], out[3], in }
+// %lvars = { n, i, j }
+//
+//unrolled code:
+//	out[0] = in;
+//	out[1] = in;
+//	out[2] = in;
+//	out[3] = in;
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+// 
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_2:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_2]]:
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 4
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %4, i256 1)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %3, align 4
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %4, [0 x i256]* %0, i256* %5)
+//CHECK-NEXT:   %6 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %6, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %8, align 4
+//CHECK-NEXT:   %9 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %9, align 4
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %10, [0 x i256]* %0, i256* %11)
+//CHECK-NEXT:   %12 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_2]]([0 x i256]* %12, [0 x i256]* %0, i256* %13)
+//CHECK-NEXT:   %14 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 2, i256* %14, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/known_function.circom b/circom/tests/loops/known_function.circom
index 65b837850..d88d7b790 100644
--- a/circom/tests/loops/known_function.circom
+++ b/circom/tests/loops/known_function.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 function funWithLoop(n) {
 	var acc = 0;
diff --git a/circom/tests/loops/known_signal_value.circom b/circom/tests/loops/known_signal_value.circom
index 6038f1785..881e15fa8 100644
--- a/circom/tests/loops/known_signal_value.circom
+++ b/circom/tests/loops/known_signal_value.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template accumulate() {
     signal input i;
diff --git a/circom/tests/loops/simple_variant_idx.circom b/circom/tests/loops/simple_variant_idx.circom
new file mode 100644
index 000000000..b72bbc2fe
--- /dev/null
+++ b/circom/tests/loops/simple_variant_idx.circom
@@ -0,0 +1,69 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template SimpleVariantIdx(n) {
+    signal input in;
+    signal output out[n];
+
+	var lc;
+    for (var i = 0; i < n; i++) {
+        out[i] <-- in;	//StoreBucket
+        lc = out[i];	//StoreBucket
+        //i++			//StoreBucket
+    }
+}
+
+component main = SimpleVariantIdx(3);
+
+//NOTE: For indexing dependent on the loop variable, need to compute pointer
+//	reference outside of the body function call. All else can be done inside.
+//
+// %0 (i.e. signal arena) = [ out[0], out[1], out[2], in ]
+// %lvars =  [ n, lc, i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]]){{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 3
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %3 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %4, i256* %5, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %7, i256 1)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %8, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY:
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SimpleVariantIdx_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %4, [0 x i256]* %0, i256* %5, i256* %6)
+//CHECK-NEXT:   %7 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %7, [0 x i256]* %0, i256* %8, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/unknown_index_from_array.circom b/circom/tests/loops/unknown_index_from_array.circom
new file mode 100644
index 000000000..a1f033191
--- /dev/null
+++ b/circom/tests/loops/unknown_index_from_array.circom
@@ -0,0 +1,26 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Example(n) {
+    signal input a[n];
+    signal input b[n];
+    signal output c[n];
+
+    for(var i = 0; i < n; i++) {
+        c[i] <-- a[b[2]];
+    }
+}
+
+component main = Example(3);
+
+// %0 (i.e. signal arena)  = { c[0], c[1], c[2] , a[0], a[1], a[2], b[0], b[1], b[2] }
+// %lvars = { n, i }
+//
+//CHECK-LABEL: define void @Example_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
+//
+//NOTE: Current implementation of loop body extraction does not move this loop body to
+//  a new function because the index of 'a' is unknown (i.e. loaded from signal 'b'). 
diff --git a/circom/tests/loops/unknown_index_from_function.circom b/circom/tests/loops/unknown_index_from_function.circom
new file mode 100644
index 000000000..7f45c66ef
--- /dev/null
+++ b/circom/tests/loops/unknown_index_from_function.circom
@@ -0,0 +1,35 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+function identity(n) {
+    return n;
+}
+
+template Example(n) {
+    signal input a[n];
+    signal input b;
+    signal output c[n];
+    
+    for(var i = 0; i < n; i++) {
+        c[i] <-- a[identity(b)];
+        //Circom AST splits this into 2 nodes:
+        //	CALL: lvars[2] = identity(b)
+        //	STORE: c[i] = a[lvars[2]]
+        //Then the loop variable increment is the 3rd statement
+        //	STORE: i = i + 1
+    }
+}
+
+component main = Example(3);
+
+// %0 (i.e. signal arena) { c[0], c[1], c[2] , a[0], a[1], a[2], b }
+// %lvars = { n, i, <identity_result> }
+//
+//CHECK-LABEL: define void @Example_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
+//
+//NOTE: Current implementation of loop body extraction does not move this loop body to
+//  a new function because the index of 'a' is unknown (i.e. function return value). 
diff --git a/circom/tests/loops/unknown_local_array_index.circom b/circom/tests/loops/unknown_local_array_index.circom
index 77b690851..2cdcf38be 100644
--- a/circom/tests/loops/unknown_local_array_index.circom
+++ b/circom/tests/loops/unknown_local_array_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ForUnknownIndex() {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_component.circom b/circom/tests/loops/unknown_loop_component.circom
index e20626b59..53ffd97af 100644
--- a/circom/tests/loops/unknown_loop_component.circom
+++ b/circom/tests/loops/unknown_loop_component.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template nbits() {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_index.circom b/circom/tests/loops/unknown_loop_index.circom
index b9656b80f..9b75d7e9d 100644
--- a/circom/tests/loops/unknown_loop_index.circom
+++ b/circom/tests/loops/unknown_loop_index.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Num2Bits(n) {
     signal input in;
diff --git a/circom/tests/loops/unknown_loop_oob.circom b/circom/tests/loops/unknown_loop_oob.circom
index d167fb514..6b8c03676 100644
--- a/circom/tests/loops/unknown_loop_oob.circom
+++ b/circom/tests/loops/unknown_loop_oob.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template accumulate() {
     signal input i;
diff --git a/circom/tests/loops/vanguard-uc-comp.circom b/circom/tests/loops/vanguard-uc-comp.circom
index 25152ee2a..b67e6c122 100644
--- a/circom/tests/loops/vanguard-uc-comp.circom
+++ b/circom/tests/loops/vanguard-uc-comp.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template Num2Bits(n) {
     signal input in;
@@ -20,74 +20,98 @@ template Num2Bits(n) {
 
 component main = Num2Bits(2);
 
-// %arena (i.e. %0 param) = [ out[0], out[1], in ]
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
 // %lvars =  [ n, lc1, e2, i ]
 // %subcmps = []
 //
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]], i256* %fix_[[X4:[0-9]+]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %call.fr_bit_and, i256* %4, align 4
+//CHECK-NEXT:   br label %assert2
+//CHECK-EMPTY: 
+//CHECK-NEXT: assert2:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_sub = call i256 @fr_sub(i256 %8, i256 1)
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %6, i256 %call.fr_sub)
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %call.fr_mul, i256 0)
+//CHECK-NEXT:   call void @__assert(i1 %call.fr_eq)
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_mul1 = call i256 @fr_mul(i256 %12, i256 %14)
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 %call.fr_mul1)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %16 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %17 = load i256, i256* %16, align 4
+//CHECK-NEXT:   %18 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %19 = load i256, i256* %18, align 4
+//CHECK-NEXT:   %call.fr_add2 = call i256 @fr_add(i256 %17, i256 %19)
+//CHECK-NEXT:   %20 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add2, i256* %20, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %21 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %22 = load i256, i256* %21, align 4
+//CHECK-NEXT:   %call.fr_add3 = call i256 @fr_add(i256 %22, i256 1)
+//CHECK-NEXT:   %23 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add3, i256* %23, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
 //CHECK-LABEL: define void @Num2Bits_{{[0-9]+}}_run
 //CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
-//CHECK: unrolled_loop{{[0-9]+}}:
-//CHECK-NEXT:  %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-//CHECK-NEXT:  %6 = load i256, i256* %5, align 4
-//CHECK-NEXT:  %7 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  %8 = load i256, i256* %7, align 4
-//CHECK-NEXT:  %call.fr_shr = call i256 @fr_shr(i256 %6, i256 %8)
-//CHECK-NEXT:  %call.fr_bit_and = call i256 @fr_bit_and(i256 %call.fr_shr, i256 1)
-//CHECK-NEXT:  %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  store i256 %call.fr_bit_and, i256* %9, align 4
-//CHECK-NEXT:  %10 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %11 = load i256, i256* %10, align 4
-//CHECK-NEXT:  %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %13 = load i256, i256* %12, align 4
-//CHECK-NEXT:  %call.fr_sub = call i256 @fr_sub(i256 %13, i256 1)
-//CHECK-NEXT:  %call.fr_mul = call i256 @fr_mul(i256 %11, i256 %call.fr_sub)
-//CHECK-NEXT:  %call.fr_eq = call i1 @fr_eq(i256 %call.fr_mul, i256 0)
-//CHECK-NEXT:  call void @__assert(i1 %call.fr_eq)
-//CHECK-NEXT:  %constraint = alloca i1, align 1
-//CHECK-NEXT:  call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
-//CHECK-NEXT:  %14 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  %15 = load i256, i256* %14, align 4
-//CHECK-NEXT:  %16 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
-//CHECK-NEXT:  %17 = load i256, i256* %16, align 4
-//CHECK-NEXT:  %18 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  %19 = load i256, i256* %18, align 4
-//CHECK-NEXT:  %call.fr_mul1 = call i256 @fr_mul(i256 %17, i256 %19)
-//CHECK-NEXT:  %call.fr_add = call i256 @fr_add(i256 %15, i256 %call.fr_mul1)
-//CHECK-NEXT:  %20 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_add, i256* %20, align 4
-//CHECK-NEXT:  %21 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  store i256 2, i256* %21, align 4
-//CHECK-NEXT:  %22 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  store i256 1, i256* %22, align 4
-//CHECK-NEXT:  %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
-//CHECK-NEXT:  %24 = load i256, i256* %23, align 4
-//CHECK-NEXT:  %25 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  %26 = load i256, i256* %25, align 4
-//CHECK-NEXT:  %call.fr_shr2 = call i256 @fr_shr(i256 %24, i256 %26)
-//CHECK-NEXT:  %call.fr_bit_and3 = call i256 @fr_bit_and(i256 %call.fr_shr2, i256 1)
-//CHECK-NEXT:  %27 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_bit_and3, i256* %27, align 4
-//CHECK-NEXT:  %28 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %29 = load i256, i256* %28, align 4
-//CHECK-NEXT:  %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %31 = load i256, i256* %30, align 4
-//CHECK-NEXT:  %call.fr_sub4 = call i256 @fr_sub(i256 %31, i256 1)
-//CHECK-NEXT:  %call.fr_mul5 = call i256 @fr_mul(i256 %29, i256 %call.fr_sub4)
-//CHECK-NEXT:  %call.fr_eq6 = call i1 @fr_eq(i256 %call.fr_mul5, i256 0)
-//CHECK-NEXT:  call void @__assert(i1 %call.fr_eq6)
-//CHECK-NEXT:  %constraint7 = alloca i1, align 1
-//CHECK-NEXT:  call void @__constraint_value(i1 %call.fr_eq6, i1* %constraint7)
-//CHECK-NEXT:  %32 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  %33 = load i256, i256* %32, align 4
-//CHECK-NEXT:  %34 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
-//CHECK-NEXT:  %35 = load i256, i256* %34, align 4
-//CHECK-NEXT:  %36 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  %37 = load i256, i256* %36, align 4
-//CHECK-NEXT:  %call.fr_mul8 = call i256 @fr_mul(i256 %35, i256 %37)
-//CHECK-NEXT:  %call.fr_add9 = call i256 @fr_add(i256 %33, i256 %call.fr_mul8)
-//CHECK-NEXT:  %38 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
-//CHECK-NEXT:  store i256 %call.fr_add9, i256* %38, align 4
-//CHECK-NEXT:  %39 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
-//CHECK-NEXT:  store i256 4, i256* %39, align 4
-//CHECK-NEXT:  %40 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 3
-//CHECK-NEXT:  store i256 2, i256* %40, align 4
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6, i256* %7, i256* %8, i256* %9)
+//CHECK-NEXT:   %10 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %14 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %10, [0 x i256]* %0, i256* %11, i256* %12, i256* %13, i256* %14)
+//CHECK-NEXT:   br label %assert{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: assert{{[0-9]+}}:
+//CHECK-NEXT:   %15 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %call.fr_eq = call i1 @fr_eq(i256 %16, i256 %18)
+//CHECK-NEXT:   call void @__assert(i1 %call.fr_eq)
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_value(i1 %call.fr_eq, i1* %constraint)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_A.circom b/circom/tests/loops/variant_idx_in_loop_A.circom
new file mode 100644
index 000000000..b35250f72
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_A.circom
@@ -0,0 +1,53 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out[n];
+
+    for (var i = 0; i<n; i++) {
+        out[i] <-- (in >> i);
+    }
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
+// %lvars =  [ n, i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 2
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %3 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %3, [0 x i256]* %0, i256* %4)
+//CHECK-NEXT:   %5 = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %6)
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/loops/variant_idx_in_loop_B.circom b/circom/tests/loops/variant_idx_in_loop_B.circom
new file mode 100644
index 000000000..b85223b1d
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_B.circom
@@ -0,0 +1,71 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out;
+
+    var temp[n];
+    for (var i = 0; i<n; i++) {
+        temp[i] = (in >> i);
+    }
+    out <-- temp[0] + temp[1];
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out, in ]
+// %lvars =  [ n, temp[0], temp[1], i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %fix_0){{.*}} {
+//CHECK:      store{{[0-9]+}}:
+//CHECK-NEXT:   %0 = getelementptr [0 x i256], [0 x i256]* %signals, i32 0, i32 1
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %call.fr_shr = call i256 @fr_shr(i256 %1, i256 %3)
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_shr, i256* %4, align 4
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %6, i256 1)
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 3
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %7, align 4
+//CHECK-NEXT:   br label %return{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: return{{[0-9]+}}:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK:      unrolled_loop{{[0-9]+}}:
+//CHECK-NEXT:   %5 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %6 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %6, i32 0, i256 1
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %5, [0 x i256]* %0, i256* %7)
+//CHECK-NEXT:   %8 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %9 = bitcast [4 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %9, i32 0, i256 2
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %8, [0 x i256]* %0, i256* %10)
+//CHECK-NEXT:   br label %store{{[0-9]+}}
+//CHECK-EMPTY: 
+//CHECK-NEXT: store{{[0-9]+}}:
+//CHECK-NEXT:   %11 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %13 = getelementptr [4 x i256], [4 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %14 = load i256, i256* %13, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 %14)
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %15, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/loops/variant_idx_in_loop_C.circom b/circom/tests/loops/variant_idx_in_loop_C.circom
new file mode 100644
index 000000000..f3313bc95
--- /dev/null
+++ b/circom/tests/loops/variant_idx_in_loop_C.circom
@@ -0,0 +1,29 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template VariantIndex(n) {
+    signal input in;
+    signal output out[n*n];
+
+    //Cannot move loop body to a new function. The index for 'out' is computed within
+    //  the loop body which means a pointer to out[x] obtained at the call site for
+    //  the new function and passed as a parameter would point to the wrong memory
+    //  location because it will use the old value of 'x'.
+    var x = 1;
+    for (var i = 0; i<n; i++) {
+        x = x + i;
+        out[x] <-- (in >> i);
+    }
+}
+
+component main = VariantIndex(2);
+
+// %0 (i.e. signal arena) = [ out[0], out[1], in ]
+// %lvars =  [ n, lc1, e2, i ]
+// %subcmps = []
+//
+//CHECK-LABEL: define void @VariantIndex_{{[0-9]+}}_run
+//CHECK-SAME: ([0 x i256]* %[[ARG:[0-9]+]])
+//CHECK: unrolled_loop{{[0-9]+}}:
+//CHECK-NOT: call void @..generated..loop.body.{{.*}}
diff --git a/circom/tests/subcmps/conv_map2idx_A.circom b/circom/tests/subcmps/conv_map2idx_A.circom
index 3c4c0afc4..4ae5fc574 100644
--- a/circom/tests/subcmps/conv_map2idx_A.circom
+++ b/circom/tests/subcmps/conv_map2idx_A.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.3;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template GetWeight(A) {
     signal input inp;
@@ -18,3 +18,113 @@ template ComputeValue() {
 }
 
 component main = ComputeValue();
+
+//CHECK-LABEL: define void @GetWeight_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [1 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 1, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [1 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 0, i256* %1, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [1 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 1, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [1 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [1 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 1, i256* %1, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [0 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   store [0 x i256]* %1, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @GetWeight_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @GetWeight_1_build({ [0 x i256]*, i32 }* %2)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %4 = load [0 x i256]*, [0 x i256]** %3, align 8
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %4, i32 0, i32 0
+//CHECK-NEXT:   store i256 888, i256* %5, align 4
+//CHECK-NEXT:   %6 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %load.subcmp.counter = load i32, i32* %6, align 4
+//CHECK-NEXT:   %decrement.counter = sub i32 %load.subcmp.counter, 1
+//CHECK-NEXT:   store i32 %decrement.counter, i32* %6, align 4
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %8)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %9 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %10 = load [0 x i256]*, [0 x i256]** %9, align 8
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %10, i32 0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %11, align 4
+//CHECK-NEXT:   %12 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %load.subcmp.counter1 = load i32, i32* %12, align 4
+//CHECK-NEXT:   %decrement.counter2 = sub i32 %load.subcmp.counter1, 1
+//CHECK-NEXT:   store i32 %decrement.counter2, i32* %12, align 4
+//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %14)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/conv_map2idx_B.circom b/circom/tests/subcmps/conv_map2idx_B.circom
index 76f067882..7fe243854 100644
--- a/circom/tests/subcmps/conv_map2idx_B.circom
+++ b/circom/tests/subcmps/conv_map2idx_B.circom
@@ -1,7 +1,7 @@
 pragma circom 2.0.3;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template GetWeight(A, B) {
     signal output x;    //signal index 0
@@ -21,3 +21,136 @@ template ComputeValue() {
 }
 
 component main = ComputeValue();
+
+//CHECK-LABEL: define void @GetWeight_0_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [3 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_0_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 999, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 999, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [3 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [3 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @GetWeight_1_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 888, i256* %1, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %2 = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 1, i256* %2, align 4
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 888, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_build({ [0 x i256]*, i32 }* %0){{.*}} {
+//CHECK-NEXT: main:
+//CHECK-NEXT:   %1 = alloca [2 x i256], align 8
+//CHECK-NEXT:   %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+//CHECK-NEXT:   store i32 0, i32* %2, align 4
+//CHECK-NEXT:   %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+//CHECK-NEXT:   %4 = bitcast [2 x i256]* %1 to [0 x i256]*
+//CHECK-NEXT:   store [0 x i256]* %4, [0 x i256]** %3, align 8
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @ComputeValue_2_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %create_cmp1
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp1:
+//CHECK-NEXT:   %1 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @GetWeight_0_build({ [0 x i256]*, i32 }* %1)
+//CHECK-NEXT:   %2 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %3 = load [0 x i256]*, [0 x i256]** %2, align 8
+//CHECK-NEXT:   call void @GetWeight_0_run([0 x i256]* %3)
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %4 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @GetWeight_1_build({ [0 x i256]*, i32 }* %4)
+//CHECK-NEXT:   %5 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %6 = load [0 x i256]*, [0 x i256]** %5, align 8
+//CHECK-NEXT:   call void @GetWeight_1_run([0 x i256]* %6)
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %7 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 2
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %10, i256* %11, align 4
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %10, i256 %12, i1* %constraint)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %13 = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %14 = load [0 x i256]*, [0 x i256]** %13, align 8
+//CHECK-NEXT:   %15 = getelementptr [0 x i256], [0 x i256]* %14, i32 0, i32 2
+//CHECK-NEXT:   %16 = load i256, i256* %15, align 4
+//CHECK-NEXT:   %17 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   store i256 %16, i256* %17, align 4
+//CHECK-NEXT:   %18 = load i256, i256* %17, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %16, i256 %18, i1* %constraint1)
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/mapped.circom b/circom/tests/subcmps/mapped.circom
index f98d272e3..69c44a397 100644
--- a/circom/tests/subcmps/mapped.circom
+++ b/circom/tests/subcmps/mapped.circom
@@ -1,7 +1,9 @@
 pragma circom 2.0.0;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*
+// TODO: I think it has problems related to both https://veridise.atlassian.net/browse/VAN-582 and https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
@@ -38,4 +40,4 @@ template B(n) {
 	}
 }
 
-component main = B(2);
\ No newline at end of file
+component main = B(2);
diff --git a/circom/tests/subcmps/mapped2.circom b/circom/tests/subcmps/mapped2.circom
index 0299af906..41b3d479c 100644
--- a/circom/tests/subcmps/mapped2.circom
+++ b/circom/tests/subcmps/mapped2.circom
@@ -1,7 +1,8 @@
 pragma circom 2.0.0;
 
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template A(n) {
 	signal input a[n];
diff --git a/circom/tests/subcmps/mapped3.circom b/circom/tests/subcmps/mapped3.circom
index 61b330457..6393b7fdc 100644
--- a/circom/tests/subcmps/mapped3.circom
+++ b/circom/tests/subcmps/mapped3.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template ArrayOp(q) {
     signal input inp[15];
diff --git a/circom/tests/subcmps/mapped4.circom b/circom/tests/subcmps/mapped4.circom
index 279ca8834..7312cbc80 100644
--- a/circom/tests/subcmps/mapped4.circom
+++ b/circom/tests/subcmps/mapped4.circom
@@ -1,6 +1,6 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template MatrixOp(q) {
     signal input inp[5][3];
diff --git a/circom/tests/subcmps/subcmps0A.circom b/circom/tests/subcmps/subcmps0A.circom
new file mode 100644
index 000000000..4f33801b3
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0A.circom
@@ -0,0 +1,144 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Like SubCmps1 but simpler (no constraints and fewer operations)
+template IsZero() {
+    signal input in;
+    signal output out;
+    out <-- -in;
+}
+
+template SubCmps0A(n) {
+    signal input ins[n];
+    signal output outs[n];
+    
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];     //load(fix)+store(subcmp)
+        outs[i] <-- zeros[i].out;   //load(subcmp)+store(fix)
+                                    //increment iteration variable
+    }
+}
+
+component main = SubCmps0A(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0A_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %[[T05:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T06:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T07:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T06]], align 8
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T07]], i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0, i256 1
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T16]], align 8
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T17]], i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T20:[0-9]+]] = bitcast i32* %[[T19]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T05]], [0 x i256]* %0, i256* %[[T09]], i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], [0 x i256]* %[[T18]], i256* %[[T20]])
+//CHECK-NEXT:   %[[T21:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T24]], i32 0, i256 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T33:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T32]], align 8
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T33]], i32 0
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T36:[0-9]+]] = bitcast i32* %[[T35]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T21]], [0 x i256]* %0, i256* %[[T25]], i256* %[[T26]], i256* %[[T27]], i256* %[[T31]], [0 x i256]* %[[T34]], i256* %[[T36]])
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0B.circom b/circom/tests/subcmps/subcmps0B.circom
new file mode 100644
index 000000000..f858f60e8
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0B.circom
@@ -0,0 +1,165 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+// Like SubCmps1 but simpler (no constraints and fewer operations)
+template IsZero() {
+    signal input in;
+    signal output out;
+    out <-- -in;
+}
+
+template SubCmps0B(n) {
+    signal input ins[n];
+    signal output outs[n];
+    var temp;
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];     //load(fix)+store(subcmp)
+        outs[i] <-- zeros[i].out;   //load(subcmp)+store(fix)
+        temp = zeros[i].out;
+                                    //increment iteration variable
+    }
+}
+
+component main = SubCmps0B(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %fix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %subfix_[[X4:[0-9]+]], i256* %subfix_[[X5:[0-9]+]], [0 x i256]* %sub_[[X5]], i256* %subc_[[X5]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X5]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X5]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %subfix_[[X5]], i32 0
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %8, i256* %9, align 4
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %10 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   %11 = load i256, i256* %10, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %11, i256 1)
+//CHECK-NEXT:   %12 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %12, align 4
+//CHECK-NEXT:   br label %return6
+//CHECK-EMPTY: 
+//CHECK-NEXT: return6:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0B_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [3 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %[[T05:[0-9]+]] = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+//CHECK-NEXT:   store i256 0, i256* %[[T05]], align 4
+//CHECK-NEXT:   br label %unrolled_loop5
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop5:
+//CHECK-NEXT:   %[[T06:[0-9]+]] = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T07:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T08:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T07]], align 8
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0, i256 1
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T14:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T13]], align 8
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T15]], i32 0, i256 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T17]], align 8
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T18]], i32 0
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0, i256 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T22:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T21]], align 8
+//CHECK-NEXT:   %[[T23:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T22]], i32 0
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T25:[0-9]+]] = bitcast i32* %[[T24]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T06]], [0 x i256]* %0, i256* %[[T10]], i256* %[[T11]], i256* %[[T12]], i256* %[[T16]], i256* %[[T20]], [0 x i256]* %[[T23]], i256* %[[T25]])
+//CHECK-NEXT:   %[[T26:[0-9]+]] = bitcast [3 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T28:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T27]], align 8
+//CHECK-NEXT:   %[[T29:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T28]], i32 0
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0, i256 1
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T34:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T33]], align 8
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T34]], i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T35]], i32 0, i256 0
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T38:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T37]], align 8
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T38]], i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T39]], i32 0, i256 0
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T41]], align 8
+//CHECK-NEXT:   %[[T43:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T42]], i32 0
+//CHECK-NEXT:   %[[T44:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T45:[0-9]+]] = bitcast i32* %[[T44]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T26]], [0 x i256]* %0, i256* %[[T30]], i256* %[[T31]], i256* %[[T32]], i256* %[[T36]], i256* %[[T40]], [0 x i256]* %[[T43]], i256* %[[T45]])
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0C.circom b/circom/tests/subcmps/subcmps0C.circom
new file mode 100644
index 000000000..c62cdff92
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0C.circom
@@ -0,0 +1,153 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template IsZero() {
+    signal input in;
+    signal output out;
+    signal temp <-- -in;
+    out <-- temp * temp;
+}
+
+template SubCmps0C(n) {
+    signal input ins[n];
+    signal output outs[n];
+
+    component zeros[n];
+    for (var i = 0; i < n; i++) {
+        zeros[i] = IsZero();
+        zeros[i].in <-- ins[i];
+        outs[i] <-- zeros[i].out;
+    }
+}
+
+component main = SubCmps0C(2);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %8, i256 1)
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %9, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @IsZero_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %call.fr_neg = call i256 @fr_neg(i256 %2)
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   store i256 %call.fr_neg, i256* %3, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %7 = load i256, i256* %6, align 4
+//CHECK-NEXT:   %call.fr_mul = call i256 @fr_mul(i256 %5, i256 %7)
+//CHECK-NEXT:   %8 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_mul, i256* %8, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0C_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [2 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 2, i256* %[[T01]], align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @IsZero_0_build({ [0 x i256]*, i32 }* %[[T03]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T04]], align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %[[T05:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T06:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T07:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T06]], align 8
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T07]], i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0, i256 1
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 0
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T16]], align 8
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T17]], i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T20:[0-9]+]] = bitcast i32* %[[T19]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T05]], [0 x i256]* %0, i256* %[[T09]], i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], [0 x i256]* %[[T18]], i256* %[[T20]])
+//CHECK-NEXT:   %[[T21:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T24]], i32 0, i256 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T27:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T33:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T32]], align 8
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T33]], i32 0
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [2 x { [0 x i256]*, i32 }], [2 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T36:[0-9]+]] = bitcast i32* %[[T35]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T21]], [0 x i256]* %0, i256* %[[T25]], i256* %[[T26]], i256* %[[T27]], i256* %[[T31]], [0 x i256]* %[[T34]], i256* %[[T36]])
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps0D.circom b/circom/tests/subcmps/subcmps0D.circom
new file mode 100644
index 000000000..c2eeee5f5
--- /dev/null
+++ b/circom/tests/subcmps/subcmps0D.circom
@@ -0,0 +1,192 @@
+pragma circom 2.0.0;
+// REQUIRES: circom
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+
+template Add() {
+    signal input in1;
+    signal input in2;
+    signal output out;
+    out <-- in1 + in2;
+}
+
+template SubCmps0D(n) {
+    signal input ins[n];
+    signal output outs[n];
+
+    component a[n];
+    for (var i = 0; i < n; i++) {
+        a[i] = Add();
+        a[i].in1 <-- ins[i];
+        a[i].in2 <-- ins[i];
+        outs[i] <-- a[i].out;
+    }
+}
+
+component main = SubCmps0D(3);
+
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID_1:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]], i256* %subfix_[[X3:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X4:[0-9]+]], i256* %fix_[[X5:[0-9]+]], i256* %subfix_[[X6:[0-9]+]], [0 x i256]* %sub_[[X6]], i256* %subc_[[X6]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID_1]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %4 = getelementptr i256, i256* %fix_[[X4]], i32 0
+//CHECK-NEXT:   %5 = load i256, i256* %4, align 4
+//CHECK-NEXT:   %6 = getelementptr i256, i256* %subfix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %5, i256* %6, align 4
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %7 = getelementptr [0 x i256], [0 x i256]* %sub_[[X6]], i32 0
+//CHECK-NEXT:   call void @Add_0_run([0 x i256]* %sub_[[X6]])
+//CHECK-NEXT:   br label %store5
+//CHECK-EMPTY: 
+//CHECK-NEXT: store5:
+//CHECK-NEXT:   %8 = getelementptr i256, i256* %subfix_[[X6]], i32 0
+//CHECK-NEXT:   %9 = load i256, i256* %8, align 4
+//CHECK-NEXT:   %10 = getelementptr i256, i256* %fix_[[X5]], i32 0
+//CHECK-NEXT:   store i256 %9, i256* %10, align 4
+//CHECK-NEXT:   br label %store6
+//CHECK-EMPTY: 
+//CHECK-NEXT: store6:
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %12 = load i256, i256* %11, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %12, i256 1)
+//CHECK-NEXT:   %13 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %13, align 4
+//CHECK-NEXT:   br label %return7
+//CHECK-EMPTY: 
+//CHECK-NEXT: return7:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @Add_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [0 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %1 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+//CHECK-NEXT:   %2 = load i256, i256* %1, align 4
+//CHECK-NEXT:   %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+//CHECK-NEXT:   %4 = load i256, i256* %3, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %2, i256 %4)
+//CHECK-NEXT:   %5 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %5, align 4
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps0D_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK-NEXT: prelude:
+//CHECK-NEXT:   %lvars = alloca [2 x i256], align 8
+//CHECK-NEXT:   %subcmps = alloca [3 x { [0 x i256]*, i32 }], align 8
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %[[T01:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 0
+//CHECK-NEXT:   store i256 3, i256* %[[T01]], align 4
+//CHECK-NEXT:   br label %create_cmp2
+//CHECK-EMPTY: 
+//CHECK-NEXT: create_cmp2:
+//CHECK-NEXT:   %[[T02:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T02]])
+//CHECK-NEXT:   %[[T03:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T03]])
+//CHECK-NEXT:   %[[T04:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2
+//CHECK-NEXT:   call void @Add_0_build({ [0 x i256]*, i32 }* %[[T04]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %[[T05:[0-9]+]] = getelementptr [2 x i256], [2 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 0, i256* %[[T05]], align 4
+//CHECK-NEXT:   br label %unrolled_loop4
+//CHECK-EMPTY: 
+//CHECK-NEXT: unrolled_loop4:
+//CHECK-NEXT:   %[[T06:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T07:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T08:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T07]], align 8
+//CHECK-NEXT:   %[[T09:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T08]], i32 0
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0, i256 1
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T13:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T12]], align 8
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T13]], i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T14]], i32 0, i256 2
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T18]], align 8
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T20]], i32 0, i256 0
+//CHECK-NEXT:   %[[T22:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T23:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T22]], align 8
+//CHECK-NEXT:   %[[T24:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T23]], i32 0
+//CHECK-NEXT:   %[[T25:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T26:[0-9]+]] = bitcast i32* %[[T25]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T06]], [0 x i256]* %0, i256* %[[T10]], i256* %[[T11]], i256* %[[T15]], i256* %[[T16]], i256* %[[T17]], i256* %[[T21]], [0 x i256]* %[[T24]], i256* %[[T26]])
+//CHECK-NEXT:   %[[T27:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T28:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T29:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T28]], align 8
+//CHECK-NEXT:   %[[T30:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T29]], i32 0
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0, i256 1
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T34:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T33]], align 8
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T34]], i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T35]], i32 0, i256 2
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T38:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T39]], align 8
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T40]], i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T41]], i32 0, i256 0
+//CHECK-NEXT:   %[[T43:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T44:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T43]], align 8
+//CHECK-NEXT:   %[[T45:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T44]], i32 0
+//CHECK-NEXT:   %[[T46:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T47:[0-9]+]] = bitcast i32* %[[T46]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T27]], [0 x i256]* %0, i256* %[[T31]], i256* %[[T32]], i256* %[[T36]], i256* %[[T37]], i256* %[[T38]], i256* %[[T42]], [0 x i256]* %[[T45]], i256* %[[T47]])
+//CHECK-NEXT:   %[[T48:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T49:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T50:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T49]], align 8
+//CHECK-NEXT:   %[[T51:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T50]], i32 0
+//CHECK-NEXT:   %[[T52:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T51]], i32 0, i256 1
+//CHECK-NEXT:   %[[T53:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T54:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T55:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T54]], align 8
+//CHECK-NEXT:   %[[T56:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T55]], i32 0
+//CHECK-NEXT:   %[[T57:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T56]], i32 0, i256 2
+//CHECK-NEXT:   %[[T58:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T59:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T60:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T61:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T60]], align 8
+//CHECK-NEXT:   %[[T62:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T61]], i32 0
+//CHECK-NEXT:   %[[T63:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T62]], i32 0, i256 0
+//CHECK-NEXT:   %[[T64:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T65:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T64]], align 8
+//CHECK-NEXT:   %[[T66:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T65]], i32 0
+//CHECK-NEXT:   %[[T67:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %[[T68:[0-9]+]] = bitcast i32* %[[T67]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID_1]]([0 x i256]* %[[T48]], [0 x i256]* %0, i256* %[[T52]], i256* %[[T53]], i256* %[[T57]], i256* %[[T58]], i256* %[[T59]], i256* %[[T63]], [0 x i256]* %[[T66]], i256* %[[T68]])
+//CHECK-NEXT:   br label %prologue
+//CHECK-EMPTY: 
+//CHECK-NEXT: prologue:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
diff --git a/circom/tests/subcmps/subcmps1.circom b/circom/tests/subcmps/subcmps1.circom
index b58b4bcf4..883d3d290 100644
--- a/circom/tests/subcmps/subcmps1.circom
+++ b/circom/tests/subcmps/subcmps1.circom
@@ -1,10 +1,10 @@
 pragma circom 2.0.0;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
 
 template IsZero() {
-    signal input in;
-    signal output out;
+    signal input in;        // subcmp signal 1
+    signal output out;      // subcmp signal 0
 
     signal inv;
 
@@ -28,4 +28,106 @@ template SubCmps1(n) {
     }
 }
 
-component main = SubCmps1(2);
\ No newline at end of file
+component main = SubCmps1(3);
+
+// %0 (i.e. signal arena) = [ outs[0], outs[1], outs[2], ins[0], ins[1], ins[2] ]
+// %lvars =  [ n, i ]
+// %subcmps = [ IsZero[0]{signals=[out,in,inv]}, IsZero[1]{SAME} ]
+//
+//CHECK-LABEL: define void @..generated..loop.body.
+//CHECK-SAME: [[$F_ID:[0-9]+]]([0 x i256]* %lvars, [0 x i256]* %signals, i256* %subfix_[[X1:[0-9]+]], i256* %fix_[[X2:[0-9]+]],
+//CHECK-SAME: i256* %fix_[[X3:[0-9]+]], i256* %subfix_[[X4:[0-9]+]], [0 x i256]* %sub_[[X4]], i256* %subc_[[X4]]){{.*}} {
+//CHECK-NEXT: ..generated..loop.body.[[$F_ID]]:
+//CHECK-NEXT:   br label %store1
+//CHECK-EMPTY: 
+//CHECK-NEXT: store1:
+//CHECK-NEXT:   %0 = getelementptr i256, i256* %fix_[[X2]], i32 0
+//CHECK-NEXT:   %1 = load i256, i256* %0, align 4
+//CHECK-NEXT:   %2 = getelementptr i256, i256* %subfix_[[X1]], i32 0
+//CHECK-NEXT:   store i256 %1, i256* %2, align 4
+//CHECK-NEXT:   %3 = load i256, i256* %2, align 4
+//CHECK-NEXT:   %constraint = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %1, i256 %3, i1* %constraint)
+//CHECK-NEXT:   br label %store2
+//CHECK-EMPTY: 
+//CHECK-NEXT: store2:
+//CHECK-NEXT:   %4 = getelementptr [0 x i256], [0 x i256]* %sub_[[X4]], i32 0
+//CHECK-NEXT:   call void @IsZero_0_run([0 x i256]* %sub_[[X4]])
+//CHECK-NEXT:   br label %store3
+//CHECK-EMPTY: 
+//CHECK-NEXT: store3:
+//CHECK-NEXT:   %5 = getelementptr i256, i256* %subfix_[[X4]], i32 0
+//CHECK-NEXT:   %6 = load i256, i256* %5, align 4
+//CHECK-NEXT:   %7 = getelementptr i256, i256* %fix_[[X3]], i32 0
+//CHECK-NEXT:   store i256 %6, i256* %7, align 4
+//CHECK-NEXT:   %8 = load i256, i256* %7, align 4
+//CHECK-NEXT:   %constraint1 = alloca i1, align 1
+//CHECK-NEXT:   call void @__constraint_values(i256 %6, i256 %8, i1* %constraint1)
+//CHECK-NEXT:   br label %store4
+//CHECK-EMPTY: 
+//CHECK-NEXT: store4:
+//CHECK-NEXT:   %9 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   %10 = load i256, i256* %9, align 4
+//CHECK-NEXT:   %call.fr_add = call i256 @fr_add(i256 %10, i256 1)
+//CHECK-NEXT:   %11 = getelementptr [0 x i256], [0 x i256]* %lvars, i32 0, i32 1
+//CHECK-NEXT:   store i256 %call.fr_add, i256* %11, align 4
+//CHECK-NEXT:   br label %return5
+//CHECK-EMPTY: 
+//CHECK-NEXT: return5:
+//CHECK-NEXT:   ret void
+//CHECK-NEXT: }
+//
+//CHECK-LABEL: define void @SubCmps1_{{[0-9]+}}_run([0 x i256]* %0){{.*}} {
+//CHECK:      unrolled_loop5:
+//CHECK-NEXT:   %[[T07:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T08:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T09:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T08]], align 8
+//CHECK-NEXT:   %[[T10:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T09]], i32 0
+//CHECK-NEXT:   %[[T11:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T10]], i32 0, i256 1
+//CHECK-NEXT:   %[[T12:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 3
+//CHECK-NEXT:   %[[T13:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 0
+//CHECK-NEXT:   %[[T14:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T15:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T14]], align 8
+//CHECK-NEXT:   %[[T16:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T15]], i32 0
+//CHECK-NEXT:   %[[T17:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T16]], i32 0, i256 0
+//CHECK-NEXT:   %[[T18:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+//CHECK-NEXT:   %[[T19:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T18]], align 8
+//CHECK-NEXT:   %[[T20:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T19]], i32 0
+//CHECK-NEXT:   %[[T21:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+//CHECK-NEXT:   %[[T22:[0-9]+]] = bitcast i32* %[[T21]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T07]], [0 x i256]* %0, i256* %[[T11]], i256* %[[T12]], i256* %[[T13]], i256* %[[T17]], [0 x i256]* %[[T20]], i256* %[[T22]])
+//CHECK-NEXT:   %[[T28:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T29:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T30:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T29]], align 8
+//CHECK-NEXT:   %[[T31:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T30]], i32 0
+//CHECK-NEXT:   %[[T32:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T31]], i32 0, i256 1
+//CHECK-NEXT:   %[[T33:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 4
+//CHECK-NEXT:   %[[T34:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 1
+//CHECK-NEXT:   %[[T35:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T36:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T35]], align 8
+//CHECK-NEXT:   %[[T37:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T36]], i32 0
+//CHECK-NEXT:   %[[T38:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T37]], i32 0, i256 0
+//CHECK-NEXT:   %[[T39:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 0
+//CHECK-NEXT:   %[[T40:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T39]], align 8
+//CHECK-NEXT:   %[[T41:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T40]], i32 0
+//CHECK-NEXT:   %[[T42:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 1, i32 1
+//CHECK-NEXT:   %[[T43:[0-9]+]] = bitcast i32* %[[T42]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T28]], [0 x i256]* %0, i256* %[[T32]], i256* %[[T33]], i256* %[[T34]], i256* %[[T38]], [0 x i256]* %[[T41]], i256* %[[T43]])
+//CHECK-NEXT:   %[[T49:[0-9]+]] = bitcast [2 x i256]* %lvars to [0 x i256]*
+//CHECK-NEXT:   %[[T50:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T51:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T50]], align 8
+//CHECK-NEXT:   %[[T52:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T51]], i32 0
+//CHECK-NEXT:   %[[T53:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T52]], i32 0, i256 1
+//CHECK-NEXT:   %[[T54:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 5
+//CHECK-NEXT:   %[[T55:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i256 2
+//CHECK-NEXT:   %[[T56:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T57:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T56]], align 8
+//CHECK-NEXT:   %[[T58:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T57]], i32 0
+//CHECK-NEXT:   %[[T59:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T58]], i32 0, i256 0
+//CHECK-NEXT:   %[[T60:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 0
+//CHECK-NEXT:   %[[T61:[0-9]+]] = load [0 x i256]*, [0 x i256]** %[[T60]], align 8
+//CHECK-NEXT:   %[[T62:[0-9]+]] = getelementptr [0 x i256], [0 x i256]* %[[T61]], i32 0
+//CHECK-NEXT:   %[[T63:[0-9]+]] = getelementptr [3 x { [0 x i256]*, i32 }], [3 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 2, i32 1
+//CHECK-NEXT:   %[[T64:[0-9]+]] = bitcast i32* %[[T63]] to i256*
+//CHECK-NEXT:   call void @..generated..loop.body.[[$F_ID]]([0 x i256]* %[[T49]], [0 x i256]* %0, i256* %[[T53]], i256* %[[T54]], i256* %[[T55]], i256* %[[T59]], [0 x i256]* %[[T62]], i256* %[[T64]])
+//CHECK-NEXT:   br label %prologue
diff --git a/circom/tests/subcmps/subcmps2.circom b/circom/tests/subcmps/subcmps2.circom
index 6f1668e7e..23f932c8e 100644
--- a/circom/tests/subcmps/subcmps2.circom
+++ b/circom/tests/subcmps/subcmps2.circom
@@ -1,6 +1,7 @@
 pragma circom 2.0.6;
 // REQUIRES: circom
-// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s
+// RUN: rm -rf %t && mkdir %t && %circom --llvm -o %t %s | sed -n 's/.*Written successfully:.* \(.*\)/\1/p' | xargs cat | FileCheck %s --enable-var-scope
+// XFAIL:.*		// pending https://veridise.atlassian.net/browse/VAN-670
 
 template Sum(n) {
     signal input inp[n];
@@ -41,3 +42,219 @@ component main = Caller();
 //CHECK: %[[SUBCMP:.*]] = load [0 x i256]*, [0 x i256]** %[[SUBCMP_PTR]]
 //CHECK: %[[SUBCMP_INP:.*]] = getelementptr [0 x i256], [0 x i256]* %[[SUBCMP]], i32 0, i32 {{[1-4]}}
 //CHECK: store i256 %[[CALL_VAL]], i256* %[[SUBCMP_INP]]
+
+/*
+define void @Sum_0_build({ [0 x i256]*, i32 }* %0) !dbg !9 {
+main:
+  %1 = alloca [5 x i256], align 8
+  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+  store i32 4, i32* %2, align 4
+  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
+  store [0 x i256]* %4, [0 x i256]** %3, align 8
+  ret void
+}
+
+define void @Sum_0_run([0 x i256]* %0) !dbg !11 {
+prelude:
+  %lvars = alloca [3 x i256], align 8
+  %subcmps = alloca [0 x { [0 x i256]*, i32 }], align 8
+  br label %store1
+
+store1:
+  %1 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 0
+  store i256 4, i256* %1, align 4
+  br label %store2
+
+store2:
+  %2 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 0, i256* %2, align 4
+  br label %store3
+
+store3:
+  %3 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 0, i256* %3, align 4
+  br label %unrolled_loop4
+
+unrolled_loop4:
+  %4 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %5 = load i256, i256* %4, align 4
+  %6 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+  %7 = load i256, i256* %6, align 4
+  %call.fr_add = call i256 @fr_add(i256 %5, i256 %7)
+  %8 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add, i256* %8, align 4
+  %9 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 1, i256* %9, align 4
+  %10 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %11 = load i256, i256* %10, align 4
+  %12 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+  %13 = load i256, i256* %12, align 4
+  %call.fr_add1 = call i256 @fr_add(i256 %11, i256 %13)
+  %14 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add1, i256* %14, align 4
+  %15 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 2, i256* %15, align 4
+  %16 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %17 = load i256, i256* %16, align 4
+  %18 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+  %19 = load i256, i256* %18, align 4
+  %call.fr_add2 = call i256 @fr_add(i256 %17, i256 %19)
+  %20 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add2, i256* %20, align 4
+  %21 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 3, i256* %21, align 4
+  %22 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %23 = load i256, i256* %22, align 4
+  %24 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+  %25 = load i256, i256* %24, align 4
+  %call.fr_add3 = call i256 @fr_add(i256 %23, i256 %25)
+  %26 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  store i256 %call.fr_add3, i256* %26, align 4
+  %27 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 2
+  store i256 4, i256* %27, align 4
+  br label %store5
+
+store5:
+  %28 = getelementptr [3 x i256], [3 x i256]* %lvars, i32 0, i32 1
+  %29 = load i256, i256* %28, align 4
+  %30 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+  store i256 %29, i256* %30, align 4
+  %31 = load i256, i256* %30, align 4
+  %constraint = alloca i1, align 1
+  call void @__constraint_values(i256 %29, i256 %31, i1* %constraint)
+  br label %prologue
+
+prologue:
+  ret void
+}
+
+define void @Caller_1_build({ [0 x i256]*, i32 }* %0) !dbg !18 {
+main:
+  %1 = alloca [5 x i256], align 8
+  %2 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 1
+  store i32 4, i32* %2, align 4
+  %3 = getelementptr { [0 x i256]*, i32 }, { [0 x i256]*, i32 }* %0, i32 0, i32 0
+  %4 = bitcast [5 x i256]* %1 to [0 x i256]*
+  store [0 x i256]* %4, [0 x i256]** %3, align 8
+  ret void
+}
+
+define void @Caller_1_run([0 x i256]* %0) !dbg !20 {
+prelude:
+  %lvars = alloca [1 x i256], align 8
+  %subcmps = alloca [1 x { [0 x i256]*, i32 }], align 8
+  br label %create_cmp1
+
+create_cmp1:
+  %1 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0
+  call void @Sum_0_build({ [0 x i256]*, i32 }* %1)
+  br label %store2
+
+store2:
+  %2 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 0, i256* %2, align 4
+  br label %unrolled_loop3
+
+unrolled_loop3:
+  %nop_0_arena = alloca [1 x i256], align 8
+  %3 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 1
+  %4 = load i256, i256* %3, align 4
+  %5 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena, i32 0, i32 0
+  store i256 %4, i256* %5, align 4
+  %6 = bitcast [1 x i256]* %nop_0_arena to i256*
+  %call.nop_0 = call i256 @nop_0(i256* %6)
+  %7 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %8 = load [0 x i256]*, [0 x i256]** %7, align 8
+  %9 = getelementptr [0 x i256], [0 x i256]* %8, i32 0, i32 1
+  store i256 %call.nop_0, i256* %9, align 4
+  %10 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter = load i32, i32* %10, align 4
+  %decrement.counter = sub i32 %load.subcmp.counter, 1
+  store i32 %decrement.counter, i32* %10, align 4
+  %11 = load i256, i256* %9, align 4
+  %constraint = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_0, i256 %11, i1* %constraint)
+  %12 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 1, i256* %12, align 4
+  %nop_0_arena1 = alloca [1 x i256], align 8
+  %13 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 2
+  %14 = load i256, i256* %13, align 4
+  %15 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena1, i32 0, i32 0
+  store i256 %14, i256* %15, align 4
+  %16 = bitcast [1 x i256]* %nop_0_arena1 to i256*
+  %call.nop_02 = call i256 @nop_0(i256* %16)
+  %17 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %18 = load [0 x i256]*, [0 x i256]** %17, align 8
+  %19 = getelementptr [0 x i256], [0 x i256]* %18, i32 0, i32 2
+  store i256 %call.nop_02, i256* %19, align 4
+  %20 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter3 = load i32, i32* %20, align 4
+  %decrement.counter4 = sub i32 %load.subcmp.counter3, 1
+  store i32 %decrement.counter4, i32* %20, align 4
+  %21 = load i256, i256* %19, align 4
+  %constraint5 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_02, i256 %21, i1* %constraint5)
+  %22 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 2, i256* %22, align 4
+  %nop_0_arena6 = alloca [1 x i256], align 8
+  %23 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 3
+  %24 = load i256, i256* %23, align 4
+  %25 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena6, i32 0, i32 0
+  store i256 %24, i256* %25, align 4
+  %26 = bitcast [1 x i256]* %nop_0_arena6 to i256*
+  %call.nop_07 = call i256 @nop_0(i256* %26)
+  %27 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %28 = load [0 x i256]*, [0 x i256]** %27, align 8
+  %29 = getelementptr [0 x i256], [0 x i256]* %28, i32 0, i32 3
+  store i256 %call.nop_07, i256* %29, align 4
+  %30 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter8 = load i32, i32* %30, align 4
+  %decrement.counter9 = sub i32 %load.subcmp.counter8, 1
+  store i32 %decrement.counter9, i32* %30, align 4
+  %31 = load i256, i256* %29, align 4
+  %constraint10 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_07, i256 %31, i1* %constraint10)
+  %32 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 3, i256* %32, align 4
+  %nop_0_arena11 = alloca [1 x i256], align 8
+  %33 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 4
+  %34 = load i256, i256* %33, align 4
+  %35 = getelementptr [1 x i256], [1 x i256]* %nop_0_arena11, i32 0, i32 0
+  store i256 %34, i256* %35, align 4
+  %36 = bitcast [1 x i256]* %nop_0_arena11 to i256*
+  %call.nop_012 = call i256 @nop_0(i256* %36)
+  %37 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %38 = load [0 x i256]*, [0 x i256]** %37, align 8
+  %39 = getelementptr [0 x i256], [0 x i256]* %38, i32 0, i32 4
+  store i256 %call.nop_012, i256* %39, align 4
+  %40 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 1
+  %load.subcmp.counter13 = load i32, i32* %40, align 4
+  %decrement.counter14 = sub i32 %load.subcmp.counter13, 1
+  store i32 %decrement.counter14, i32* %40, align 4
+  %41 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %42 = load [0 x i256]*, [0 x i256]** %41, align 8
+  call void @Sum_0_run([0 x i256]* %42)
+  %43 = load i256, i256* %39, align 4
+  %constraint15 = alloca i1, align 1
+  call void @__constraint_values(i256 %call.nop_012, i256 %43, i1* %constraint15)
+  %44 = getelementptr [1 x i256], [1 x i256]* %lvars, i32 0, i32 0
+  store i256 4, i256* %44, align 4
+  br label %store4
+
+store4:
+  %45 = getelementptr [1 x { [0 x i256]*, i32 }], [1 x { [0 x i256]*, i32 }]* %subcmps, i32 0, i32 0, i32 0
+  %46 = load [0 x i256]*, [0 x i256]** %45, align 8
+  %47 = getelementptr [0 x i256], [0 x i256]* %46, i32 0, i32 0
+  %48 = load i256, i256* %47, align 4
+  %49 = getelementptr [0 x i256], [0 x i256]* %0, i32 0, i32 0
+  store i256 %48, i256* %49, align 4
+  %50 = load i256, i256* %49, align 4
+  %constraint16 = alloca i1, align 1
+  call void @__constraint_values(i256 %48, i256 %50, i1* %constraint16)
+  br label %prologue
+
+prologue:
+  ret void
+}
+*/
diff --git a/circuit_passes/Cargo.toml b/circuit_passes/Cargo.toml
index e61d301f6..c0a9b2697 100644
--- a/circuit_passes/Cargo.toml
+++ b/circuit_passes/Cargo.toml
@@ -11,4 +11,6 @@ compiler = {path = "../compiler"}
 program_structure = {path = "../program_structure"}
 code_producers = {path = "../code_producers"}
 intervallum = "1.4.0"
-circom_algebra = {path = "../circom_algebra"}
\ No newline at end of file
+circom_algebra = {path = "../circom_algebra"}
+const_format = "0.2.31"
+indexmap = "2.0.0"
diff --git a/circuit_passes/src/bucket_interpreter/env.rs b/circuit_passes/src/bucket_interpreter/env.rs
deleted file mode 100644
index f94292b46..000000000
--- a/circuit_passes/src/bucket_interpreter/env.rs
+++ /dev/null
@@ -1,268 +0,0 @@
-use std::collections::HashMap;
-use std::fmt::{Display, Formatter};
-use compiler::circuit_design::function::FunctionCode;
-use compiler::circuit_design::template::TemplateCode;
-use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
-
-pub type TemplatesLibrary = HashMap<String, TemplateCode>;
-pub type FunctionsLibrary = HashMap<String, FunctionCode>;
-
-pub trait ContextSwitcher {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &'a String,
-    ) -> BucketInterpreter<'a>;
-}
-
-impl<L: JoinSemiLattice + Clone> JoinSemiLattice for HashMap<usize, L> {
-    fn join(&self, other: &Self) -> Self {
-        let mut new: HashMap<usize, L> = Default::default();
-        for (k, v) in self {
-            new.insert(*k, v.clone());
-        }
-
-        for (k, v) in other {
-            if new.contains_key(&k) {
-                new.get_mut(&k).unwrap().join(v);
-            } else {
-                new.insert(*k, v.clone());
-            }
-        }
-        new
-    }
-}
-
-#[derive(Clone, Debug, Eq, PartialEq)]
-pub struct SubcmpEnv<'a> {
-    pub signals: HashMap<usize, Value>,
-    counter: usize,
-    name: &'a String,
-    template_id: usize,
-}
-
-impl JoinSemiLattice for SubcmpEnv<'_> {
-    fn join(&self, other: &Self) -> Self {
-        assert_eq!(self.name, other.name);
-        assert_eq!(self.template_id, other.template_id);
-        SubcmpEnv {
-            signals: self.signals.join(&other.signals),
-            counter: std::cmp::min(self.counter, other.counter),
-            name: self.name,
-            template_id: self.template_id,
-        }
-    }
-}
-
-impl<'a> SubcmpEnv<'a> {
-    pub fn new(inputs: usize, name: &'a String, template_id: usize) -> Self {
-        SubcmpEnv { signals: Default::default(), counter: inputs, name, template_id }
-    }
-
-    pub fn reset(self) -> Self {
-        let mut copy = self;
-        copy.signals.clear();
-        copy
-    }
-
-    pub fn get_signal(&self, index: usize) -> Value {
-        self.signals.get(&index).unwrap_or_default().clone()
-    }
-
-    pub fn set_signal(self, idx: usize, value: Value) -> SubcmpEnv<'a> {
-        let mut copy = self;
-        copy.signals.insert(idx, value);
-        copy
-    }
-
-    pub fn counter_is_zero(&self) -> bool {
-        self.counter == 0
-    }
-
-    pub fn decrease_counter(self) -> SubcmpEnv<'a> {
-        let mut copy = self;
-        copy.counter -= 1;
-        copy
-    }
-
-    pub fn counter_equal_to(&self, value: usize) -> bool {
-        self.counter == value
-    }
-}
-
-// An immutable env that returns a new copy when modified
-#[derive(Clone)]
-pub struct Env<'a> {
-    vars: HashMap<usize, Value>,
-    signals: HashMap<usize, Value>,
-    subcmps: HashMap<usize, SubcmpEnv<'a>>,
-    templates_library: &'a TemplatesLibrary,
-    functions_library: &'a FunctionsLibrary,
-    context_switcher: &'a dyn ContextSwitcher,
-}
-
-impl Display for Env<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}",
-            self.vars, self.signals, self.subcmps
-        )
-    }
-}
-
-impl<'a> Env<'a> {
-    pub fn new(
-        templates_library: &'a TemplatesLibrary,
-        functions_library: &'a FunctionsLibrary,
-        context_switcher: &'a dyn ContextSwitcher,
-    ) -> Self {
-        Env {
-            vars: Default::default(),
-            signals: Default::default(),
-            subcmps: Default::default(),
-            templates_library,
-            functions_library,
-            context_switcher,
-        }
-    }
-
-    // READ OPERATIONS
-    pub fn get_var(&self, idx: usize) -> Value {
-        self.vars.get(&idx).unwrap_or_default().clone()
-    }
-
-    pub fn get_signal(&self, idx: usize) -> Value {
-        self.signals.get(&idx).unwrap_or_default().clone()
-    }
-
-    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
-        self.subcmps[&subcmp_idx].get_signal(signal_idx)
-    }
-
-    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
-        self.subcmps[&subcmp_idx].name
-    }
-
-    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
-        self.subcmps[&subcmp_idx].template_id
-    }
-
-    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
-        self.subcmps.get(&subcmp_idx).unwrap().counter_is_zero()
-    }
-
-    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
-        self.subcmps.get(&subcmp_idx).unwrap().counter_equal_to(value)
-    }
-
-    // WRITE OPERATIONS
-    pub fn set_var(self, idx: usize, value: Value) -> Self {
-        let mut copy = self;
-        copy.vars.insert(idx, value);
-        copy
-    }
-
-    pub fn set_signal(self, idx: usize, value: Value) -> Self {
-        let mut copy = self;
-        copy.signals.insert(idx, value);
-        copy
-    }
-
-    /// Sets all the signals of the subcmp to UNK
-    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
-        let mut copy = self;
-        let subcmp_env = copy
-            .subcmps
-            .remove(&subcmp_idx)
-            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.reset());
-        copy
-    }
-
-    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
-        //let subcmp = &self.subcmps[&subcmp_idx];
-        let mut copy = self;
-        let subcmp_env = copy
-            .subcmps
-            .remove(&subcmp_idx)
-            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.set_signal(signal_idx, value));
-        copy
-    }
-
-    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
-        let mut copy = self;
-        let subcmp_env = copy
-            .subcmps
-            .remove(&subcmp_idx)
-            .expect(format!("Can't decrease counter of subcomponent {}", subcmp_idx).as_str());
-        copy.subcmps.insert(subcmp_idx, subcmp_env.decrease_counter());
-        copy
-    }
-
-    pub fn run_subcmp(
-        self,
-        _subcmp_idx: usize,
-        _name: &String,
-        _interpreter: &BucketInterpreter,
-        _observe: bool,
-    ) -> Self {
-        // The env returns Unknown by default to any index that does not have a value
-        // So we can fake executing a subcomponent and any read to the output
-        // of a subcomponent will return Unknown which is the only value that signals can have.
-        self
-    }
-
-    pub fn create_subcmp(
-        self,
-        name: &'a String,
-        base_index: usize,
-        count: usize,
-        template_id: usize,
-    ) -> Self {
-        let number_of_inputs = { self.templates_library[name].number_of_inputs };
-        let mut copy = self;
-        for i in base_index..(base_index + count) {
-            copy.subcmps.insert(i, SubcmpEnv::new(number_of_inputs, name, template_id));
-        }
-        copy
-    }
-
-    pub fn run_function(
-        &self,
-        name: &String,
-        interpreter: &BucketInterpreter,
-        args: Vec<Value>,
-        observe: bool,
-    ) -> Value {
-        if cfg!(debug_assertions) {
-            println!("Running function {}", name);
-        }
-        let code = &self.functions_library[name].body;
-        let mut function_env =
-            Env::new(self.templates_library, self.functions_library, self.context_switcher);
-        for (id, arg) in args.iter().enumerate() {
-            function_env = function_env.set_var(id, arg.clone());
-        }
-        let interpreter = self.context_switcher.switch(interpreter, name);
-        let r = interpreter.execute_instructions(
-            &code,
-            function_env,
-            !interpreter.observer.ignore_function_calls() && observe,
-        );
-        r.0.expect("Function must return a value!")
-    }
-
-    pub fn join(&self, other: &Self) -> Self {
-        Env {
-            vars: self.vars.join(&other.vars),
-            signals: self.signals.join(&other.signals),
-            subcmps: self.subcmps.join(&other.subcmps),
-            templates_library: self.templates_library,
-            functions_library: self.functions_library,
-            context_switcher: self.context_switcher,
-        }
-    }
-}
diff --git a/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
new file mode 100644
index 000000000..57685e7e4
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/extracted_func_env.rs
@@ -0,0 +1,301 @@
+use std::cell::Ref;
+use std::collections::{HashMap, BTreeMap};
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use compiler::intermediate_representation::Instruction;
+use compiler::intermediate_representation::ir_interface::{AddressType, ValueBucket, ValueType};
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::loop_unroll::body_extractor::ToOriginalLocation;
+use super::{Env, LibraryAccess};
+
+/// This Env is used to process functions created by extracting loop bodies
+/// into 'LOOP_BODY_FN_PREFIX' functions. It has to interpret the references
+/// produced by ExtractedFunctionLocationUpdater (i.e. some loads and stores
+/// are converted to AddressType::SubcmpSignal that indicate which function
+/// parameter holds the necessary data).
+#[derive(Clone)]
+pub struct ExtractedFuncEnvData<'a> {
+    base: Box<Env<'a>>,
+    remap: ToOriginalLocation,
+}
+
+impl Display for ExtractedFuncEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "ExtractedFuncEnv{{")?;
+        self.base.fmt(f)?;
+        write!(f, "}}")
+    }
+}
+
+impl LibraryAccess for ExtractedFuncEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        self.base.get_function(name)
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.base.get_template(name)
+    }
+}
+
+// All subcomponent lookups need to use the map from loop unrolling to convert the
+//  AddressType::SubcmpSignal references created by ExtractedFunctionLocationUpdater
+//  back into the proper reference to access the correct Env entry.
+impl<'a> ExtractedFuncEnvData<'a> {
+    pub fn new(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
+        ExtractedFuncEnvData { base: Box::new(inner), remap }
+    }
+
+    pub fn get_base(self) -> Env<'a> {
+        *self.base
+    }
+
+    pub fn get_var(&self, idx: usize) -> Value {
+        // Local variables are referenced in the normal way
+        self.base.get_var(idx)
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        // Signals are referenced in the normal way
+        self.base.get_signal(idx)
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        let res = match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                //  the LocationRule that 'signal_idx' is computed from.
+                assert_eq!(signal_idx, 0);
+                match loc {
+                    AddressType::Variable => self.base.get_var(*idx),
+                    AddressType::Signal => self.base.get_signal(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.get_subcmp_signal(subcmp, *idx)
+                    }
+                }
+            }
+        };
+        res
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                match loc {
+                    AddressType::Variable => self.base.get_subcmp_name(*idx),
+                    AddressType::Signal => self.base.get_subcmp_name(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                        //  the LocationRule that 'signal_idx' is computed from.
+                        assert_eq!(*idx, 0);
+                        self.base.get_subcmp_name(subcmp)
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        match self.remap.get(&subcmp_idx) {
+            None => todo!(), // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, idx)) => {
+                match loc {
+                    AddressType::Variable => self.base.get_subcmp_template_id(*idx),
+                    AddressType::Signal => self.base.get_subcmp_template_id(*idx),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match **cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                        //  the LocationRule that 'signal_idx' is computed from.
+                        assert_eq!(*idx, 0);
+                        self.base.get_subcmp_template_id(subcmp)
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        let res = match self.remap.get(&subcmp_idx).cloned() {
+            //TODO: Is this None case being hit by a pre-existing subcmp at index 0 reference? I think so. Can I verify?
+            //  All subcmp refs in extracted body should have been replaced with refs to a subfix parameter... right?
+            //OBS: It happens because there will be Unknown counter when certain loop bodies are extracted to a function.
+            //  That means I do need to add the code to decrement counters inside the loop and let StoreBucket generate
+            //  the counter checks that will determine when to execute the "run" function at runtime.
+            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.subcmp_counter_is_zero(subcmp)
+                    }
+                    _ => false, // no counter for Variable/Signal types
+                }
+            }
+        };
+        res
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        let res = match self.remap.get(&subcmp_idx).cloned() {
+            None => todo!(), //false, // from ArgIndex::SubCmp 'arena' and 'counter' parameters
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.subcmp_counter_equal_to(subcmp, value)
+                    }
+                    _ => false, // no counter for Variable/Signal types
+                }
+            }
+        };
+        res
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        self.base.get_vars_clone()
+    }
+
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.base.get_vars_sort()
+    }
+
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        // Local variables are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_var(idx, value)), remap: self.remap }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        // Signals are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_signal(idx, value)), remap: self.remap }
+    }
+
+    pub fn set_all_to_unk(self) -> Self {
+        // Local variables are referenced in the normal way
+        ExtractedFuncEnvData { base: Box::new(self.base.set_all_to_unk()), remap: self.remap }
+    }
+
+    pub fn set_subcmp_to_unk(self, _subcmp_idx: usize) -> Self {
+        unreachable!()
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        //NOTE: This is only called by BucketInterpreter::store_value_in_address.
+        //Use the map from loop unrolling to convert the SubcmpSignal reference back
+        //  into the proper reference (reversing ExtractedFunctionLocationUpdater).
+        let new_env = match self.remap.get(&subcmp_idx).cloned() {
+            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
+            //  to the 'remap' (producing None result here) because those parameters are
+            //  not actually used to access signals, just to call _run and update counter.
+            None => *self.base,
+            Some((loc, idx)) => {
+                //ASSERT: ExtractedFunctionLocationUpdater will always assign 0 in
+                //  the LocationRule that 'signal_idx' is computed from.
+                assert_eq!(signal_idx, 0);
+                match loc {
+                    AddressType::Variable => self.base.set_var(idx, value),
+                    AddressType::Signal => self.base.set_signal(idx, value),
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.set_subcmp_signal(subcmp, idx, value)
+                    }
+                }
+            }
+        };
+        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        let new_env = match self.remap.get(&subcmp_idx).cloned() {
+            //NOTE: The ArgIndex::SubCmp 'arena' and 'counter' parameters were not added
+            //  to the 'remap' (producing None result here) because those parameters are
+            //  not actually used to access signals, just to call _run and update counter.
+            //  No counter update needed when SubcmpSignal is used for these special cases.
+            None => *self.base,
+            Some((loc, _)) => {
+                match loc {
+                    AddressType::SubcmpSignal { cmp_address, .. } => {
+                        let subcmp = match *cmp_address {
+                            Instruction::Value(ValueBucket {
+                                parse_as: ValueType::U32,
+                                value,
+                                ..
+                            }) => value,
+                            _ => unreachable!(), //ASSERT: 'cmp_address' was formed by 'loop_unroll::new_u32_value'
+                        };
+                        self.base.decrease_subcmp_counter(subcmp)
+                    }
+                    _ => *self.base, // no counter for Variable/Signal types
+                }
+            }
+        };
+        ExtractedFuncEnvData { base: Box::new(new_env), remap: self.remap }
+    }
+
+    pub fn run_subcmp(
+        self,
+        _subcmp_idx: usize,
+        _name: &String,
+        _interpreter: &BucketInterpreter,
+        _observe: bool,
+    ) -> Self {
+        //Return self just like the StandardEnvData
+        self
+    }
+
+    pub fn create_subcmp(
+        self,
+        _name: &'a String,
+        _base_index: usize,
+        _count: usize,
+        _template_id: usize,
+    ) -> Self {
+        unreachable!()
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/env/mod.rs b/circuit_passes/src/bucket_interpreter/env/mod.rs
new file mode 100644
index 000000000..30e729888
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/mod.rs
@@ -0,0 +1,288 @@
+use std::cell::Ref;
+use std::collections::{HashMap, BTreeMap};
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::loop_unroll::body_extractor::{LoopBodyExtractor, ToOriginalLocation};
+use self::extracted_func_env::ExtractedFuncEnvData;
+use self::standard_env::StandardEnvData;
+use self::unrolled_block_env::UnrolledBlockEnvData;
+
+mod standard_env;
+mod unrolled_block_env;
+mod extracted_func_env;
+
+pub trait LibraryAccess {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode>;
+    fn get_template(&self, name: &String) -> Ref<TemplateCode>;
+}
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SubcmpEnv {
+    signals: HashMap<usize, Value>,
+    counter: usize,
+    name: String,
+    template_id: usize,
+}
+
+impl SubcmpEnv {
+    pub fn new(inputs: usize, name: &String, template_id: usize) -> Self {
+        SubcmpEnv { signals: Default::default(), counter: inputs, name: name.clone(), template_id }
+    }
+
+    pub fn reset(self) -> Self {
+        let mut copy = self;
+        copy.signals.clear();
+        copy
+    }
+
+    pub fn get_signal(&self, index: usize) -> Value {
+        self.signals.get(&index).unwrap_or_default().clone()
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> SubcmpEnv {
+        let mut copy = self;
+        copy.signals.insert(idx, value);
+        copy
+    }
+
+    pub fn counter_is_zero(&self) -> bool {
+        self.counter == 0
+    }
+
+    pub fn decrease_counter(self) -> SubcmpEnv {
+        let mut copy = self;
+        copy.counter -= 1;
+        copy
+    }
+
+    pub fn counter_equal_to(&self, value: usize) -> bool {
+        self.counter == value
+    }
+}
+
+// An immutable environment whose modification methods return a new object
+#[derive(Clone)]
+pub enum Env<'a> {
+    Standard(StandardEnvData<'a>),
+    UnrolledBlock(UnrolledBlockEnvData<'a>),
+    ExtractedFunction(ExtractedFuncEnvData<'a>),
+}
+
+impl Display for Env<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        match self {
+            Env::Standard(d) => d.fmt(f),
+            Env::UnrolledBlock(d) => d.fmt(f),
+            Env::ExtractedFunction(d) => d.fmt(f),
+        }
+    }
+}
+
+impl LibraryAccess for Env<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        match self {
+            Env::Standard(d) => d.get_function(name),
+            Env::UnrolledBlock(d) => d.get_function(name),
+            Env::ExtractedFunction(d) => d.get_function(name),
+        }
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        match self {
+            Env::Standard(d) => d.get_template(name),
+            Env::UnrolledBlock(d) => d.get_template(name),
+            Env::ExtractedFunction(d) => d.get_template(name),
+        }
+    }
+}
+
+impl<'a> Env<'a> {
+    pub fn new_standard_env(libs: &'a dyn LibraryAccess) -> Self {
+        Env::Standard(StandardEnvData::new(libs))
+    }
+
+    pub fn new_unroll_block_env(inner: Env<'a>, extractor: &'a LoopBodyExtractor) -> Self {
+        Env::UnrolledBlock(UnrolledBlockEnvData::new(inner, extractor))
+    }
+
+    pub fn new_extracted_func_env(inner: Env<'a>, remap: ToOriginalLocation) -> Self {
+        Env::ExtractedFunction(ExtractedFuncEnvData::new(inner, remap))
+    }
+
+    pub fn peel_extracted_func(self) -> Self {
+        match self {
+            Env::ExtractedFunction(d) => d.get_base(),
+            _ => self,
+        }
+    }
+
+    // READ OPERATIONS
+    pub fn get_var(&self, idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_var(idx),
+            Env::UnrolledBlock(d) => d.get_var(idx),
+            Env::ExtractedFunction(d) => d.get_var(idx),
+        }
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_signal(idx),
+            Env::UnrolledBlock(d) => d.get_signal(idx),
+            Env::ExtractedFunction(d) => d.get_signal(idx),
+        }
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        match self {
+            Env::Standard(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_signal(subcmp_idx, signal_idx),
+        }
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        match self {
+            Env::Standard(d) => d.get_subcmp_name(subcmp_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_name(subcmp_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_name(subcmp_idx),
+        }
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        match self {
+            Env::Standard(d) => d.get_subcmp_template_id(subcmp_idx),
+            Env::UnrolledBlock(d) => d.get_subcmp_template_id(subcmp_idx),
+            Env::ExtractedFunction(d) => d.get_subcmp_template_id(subcmp_idx),
+        }
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        match self {
+            Env::Standard(d) => d.subcmp_counter_is_zero(subcmp_idx),
+            Env::UnrolledBlock(d) => d.subcmp_counter_is_zero(subcmp_idx),
+            Env::ExtractedFunction(d) => d.subcmp_counter_is_zero(subcmp_idx),
+        }
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        match self {
+            Env::Standard(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+            Env::UnrolledBlock(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+            Env::ExtractedFunction(d) => d.subcmp_counter_equal_to(subcmp_idx, value),
+        }
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        match self {
+            Env::Standard(d) => d.get_vars_clone(),
+            Env::UnrolledBlock(d) => d.get_vars_clone(),
+            Env::ExtractedFunction(d) => d.get_vars_clone(),
+        }
+    }
+
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        match self {
+            Env::Standard(d) => d.get_vars_sort(),
+            Env::UnrolledBlock(d) => d.get_vars_sort(),
+            Env::ExtractedFunction(d) => d.get_vars_sort(),
+        }
+    }
+
+    // WRITE OPERATIONS
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_var(idx, value)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_var(idx, value)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_var(idx, value)),
+        }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_signal(idx, value)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_signal(idx, value)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_signal(idx, value)),
+        }
+    }
+
+    pub fn set_all_to_unk(self) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_all_to_unk()),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_all_to_unk()),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_all_to_unk()),
+        }
+    }
+
+    /// Sets all the signals of the subcmp to UNK
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_subcmp_to_unk(subcmp_idx)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.set_subcmp_to_unk(subcmp_idx)),
+            Env::ExtractedFunction(d) => Env::ExtractedFunction(d.set_subcmp_to_unk(subcmp_idx)),
+        }
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.set_subcmp_signal(subcmp_idx, signal_idx, value)),
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.set_subcmp_signal(subcmp_idx, signal_idx, value))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.set_subcmp_signal(subcmp_idx, signal_idx, value))
+            }
+        }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.decrease_subcmp_counter(subcmp_idx)),
+            Env::UnrolledBlock(d) => Env::UnrolledBlock(d.decrease_subcmp_counter(subcmp_idx)),
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.decrease_subcmp_counter(subcmp_idx))
+            }
+        }
+    }
+
+    pub fn run_subcmp(
+        self,
+        subcmp_idx: usize,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        observe: bool,
+    ) -> Self {
+        match self {
+            Env::Standard(d) => Env::Standard(d.run_subcmp(subcmp_idx, name, interpreter, observe)),
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.run_subcmp(subcmp_idx, name, interpreter, observe))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.run_subcmp(subcmp_idx, name, interpreter, observe))
+            }
+        }
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &'a String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        match self {
+            Env::Standard(d) => {
+                Env::Standard(d.create_subcmp(name, base_index, count, template_id))
+            }
+            Env::UnrolledBlock(d) => {
+                Env::UnrolledBlock(d.create_subcmp(name, base_index, count, template_id))
+            }
+            Env::ExtractedFunction(d) => {
+                Env::ExtractedFunction(d.create_subcmp(name, base_index, count, template_id))
+            }
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/env/standard_env.rs b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
new file mode 100644
index 000000000..ab211aa02
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/standard_env.rs
@@ -0,0 +1,173 @@
+use std::cell::Ref;
+use std::collections::{HashMap, BTreeMap};
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use super::{SubcmpEnv, LibraryAccess};
+
+#[derive(Clone)]
+pub struct StandardEnvData<'a> {
+    vars: HashMap<usize, Value>,
+    signals: HashMap<usize, Value>,
+    subcmps: HashMap<usize, SubcmpEnv>,
+    libs: &'a dyn LibraryAccess,
+}
+
+impl Display for StandardEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(
+            f,
+            "StandardEnv{{\n  vars = {:?}\n  signals = {:?}\n  subcmps = {:?}}}",
+            self.vars, self.signals, self.subcmps
+        )
+    }
+}
+
+impl LibraryAccess for StandardEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        self.libs.get_function(name)
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.libs.get_template(name)
+    }
+}
+
+impl<'a> StandardEnvData<'a> {
+    pub fn new(libs: &'a dyn LibraryAccess) -> Self {
+        StandardEnvData {
+            vars: Default::default(),
+            signals: Default::default(),
+            subcmps: Default::default(),
+            libs,
+        }
+    }
+
+    // READ OPERATIONS
+    pub fn get_var(&self, idx: usize) -> Value {
+        self.vars.get(&idx).unwrap_or_default().clone()
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        self.signals.get(&idx).unwrap_or_default().clone()
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        self.subcmps[&subcmp_idx].get_signal(signal_idx)
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        &self.subcmps[&subcmp_idx].name
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        self.subcmps[&subcmp_idx].template_id
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        self.subcmps.get(&subcmp_idx).unwrap().counter_is_zero()
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        self.subcmps.get(&subcmp_idx).unwrap().counter_equal_to(value)
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        self.vars.clone()
+    }
+
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.vars.iter().fold(BTreeMap::new(), |mut acc, e| {
+            acc.insert(*e.0, e.1.clone());
+            acc
+        })
+    }
+
+    // WRITE OPERATIONS
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        let mut copy = self;
+        copy.vars.insert(idx, value);
+        copy
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        let mut copy = self;
+        copy.signals.insert(idx, value);
+        copy
+    }
+
+    pub fn set_all_to_unk(self) -> Self {
+        let mut copy = self;
+        for (_, v) in copy.vars.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.signals.iter_mut() {
+            *v = Value::Unknown;
+        }
+        for (_, v) in copy.subcmps.iter_mut() {
+            v.signals.clear();
+        }
+        copy
+    }
+
+    /// Sets all the signals of the subcmp to UNK
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        let mut copy = self;
+        let subcmp_env = copy
+            .subcmps
+            .remove(&subcmp_idx)
+            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
+        copy.subcmps.insert(subcmp_idx, subcmp_env.reset());
+        copy
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        let mut copy = self;
+        let subcmp_env = copy
+            .subcmps
+            .remove(&subcmp_idx)
+            .expect(format!("Can't set a signal of subcomponent {}", subcmp_idx).as_str());
+        copy.subcmps.insert(subcmp_idx, subcmp_env.set_signal(signal_idx, value));
+        copy
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        let mut copy = self;
+        let subcmp_env = copy
+            .subcmps
+            .remove(&subcmp_idx)
+            .expect(format!("Can't decrease counter of subcomponent {}", subcmp_idx).as_str());
+        copy.subcmps.insert(subcmp_idx, subcmp_env.decrease_counter());
+        copy
+    }
+
+    pub fn run_subcmp(
+        self,
+        _subcmp_idx: usize,
+        _name: &String,
+        _interpreter: &BucketInterpreter,
+        _observe: bool,
+    ) -> Self {
+        // The env returns Unknown by default to any index that does not have a value
+        // So we can fake executing a subcomponent and any read to the output
+        // of a subcomponent will return Unknown which is the only value that signals can have.
+        self
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        let number_of_inputs = self.get_template(name).number_of_inputs;
+        let mut copy = self;
+        for i in base_index..(base_index + count) {
+            copy.subcmps.insert(i, SubcmpEnv::new(number_of_inputs, name, template_id));
+        }
+        copy
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
new file mode 100644
index 000000000..9b4b3dae7
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/env/unrolled_block_env.rs
@@ -0,0 +1,156 @@
+use std::cell::Ref;
+use std::collections::{HashMap, BTreeMap};
+use std::fmt::{Display, Formatter, Result};
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::body_extractor::LoopBodyExtractor;
+use super::{Env, LibraryAccess};
+
+/// This Env is used by the loop unroller to process the BlockBucket containing a
+/// unrolled loop specifically handling the case where the LibraryAccess does not
+/// contain the functions generated to hold the extracted loop bodies. It instead
+/// uses the temporary list in the LoopBodyExtractor to get those function bodies.
+#[derive(Clone)]
+pub struct UnrolledBlockEnvData<'a> {
+    base: Box<Env<'a>>,
+    extractor: &'a LoopBodyExtractor,
+}
+
+impl Display for UnrolledBlockEnvData<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "UnrolledBlockEnv{{")?;
+        self.base.fmt(f)?;
+        write!(f, "}}")
+    }
+}
+
+impl LibraryAccess for UnrolledBlockEnvData<'_> {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        if name.starts_with(LOOP_BODY_FN_PREFIX) {
+            Ref::map(self.extractor.get_new_functions(), |f| {
+                f.iter()
+                    .find(|f| f.name.eq(name))
+                    .expect("Cannot find extracted function definition!")
+            })
+        } else {
+            self.base.get_function(name)
+        }
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        self.base.get_template(name)
+    }
+}
+
+impl<'a> UnrolledBlockEnvData<'a> {
+    pub fn new(base: Env<'a>, extractor: &'a LoopBodyExtractor) -> Self {
+        UnrolledBlockEnvData { base: Box::new(base), extractor }
+    }
+
+    pub fn get_var(&self, idx: usize) -> Value {
+        self.base.get_var(idx)
+    }
+
+    pub fn get_signal(&self, idx: usize) -> Value {
+        self.base.get_signal(idx)
+    }
+
+    pub fn get_subcmp_signal(&self, subcmp_idx: usize, signal_idx: usize) -> Value {
+        self.base.get_subcmp_signal(subcmp_idx, signal_idx)
+    }
+
+    pub fn get_subcmp_name(&self, subcmp_idx: usize) -> &String {
+        self.base.get_subcmp_name(subcmp_idx)
+    }
+
+    pub fn get_subcmp_template_id(&self, subcmp_idx: usize) -> usize {
+        self.base.get_subcmp_template_id(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_is_zero(&self, subcmp_idx: usize) -> bool {
+        self.base.subcmp_counter_is_zero(subcmp_idx)
+    }
+
+    pub fn subcmp_counter_equal_to(&self, subcmp_idx: usize, value: usize) -> bool {
+        self.base.subcmp_counter_equal_to(subcmp_idx, value)
+    }
+
+    pub fn get_vars_clone(&self) -> HashMap<usize, Value> {
+        self.base.get_vars_clone()
+    }
+
+    pub fn get_vars_sort(&self) -> BTreeMap<usize, Value> {
+        self.base.get_vars_sort()
+    }
+
+    pub fn set_var(self, idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_var(idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_signal(self, idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_signal(idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_all_to_unk(self) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_all_to_unk()),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_subcmp_to_unk(self, subcmp_idx: usize) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_subcmp_to_unk(subcmp_idx)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn set_subcmp_signal(self, subcmp_idx: usize, signal_idx: usize, value: Value) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.set_subcmp_signal(subcmp_idx, signal_idx, value)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn decrease_subcmp_counter(self, subcmp_idx: usize) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.decrease_subcmp_counter(subcmp_idx)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn run_subcmp(
+        self,
+        subcmp_idx: usize,
+        name: &String,
+        interpreter: &BucketInterpreter,
+        observe: bool,
+    ) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.run_subcmp(subcmp_idx, name, interpreter, observe)),
+            extractor: self.extractor,
+        }
+    }
+
+    pub fn create_subcmp(
+        self,
+        name: &'a String,
+        base_index: usize,
+        count: usize,
+        template_id: usize,
+    ) -> Self {
+        UnrolledBlockEnvData {
+            base: Box::new(self.base.create_subcmp(name, base_index, count, template_id)),
+            extractor: self.extractor,
+        }
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/memory.rs b/circuit_passes/src/bucket_interpreter/memory.rs
new file mode 100644
index 000000000..368474491
--- /dev/null
+++ b/circuit_passes/src/bucket_interpreter/memory.rs
@@ -0,0 +1,164 @@
+use std::cell::{RefCell, Ref};
+use std::collections::HashMap;
+use std::ops::Range;
+use code_producers::components::{TemplateInstanceIOMap, IODef};
+use code_producers::llvm_elements::IndexMapping;
+use compiler::circuit_design::function::FunctionCode;
+use compiler::circuit_design::template::TemplateCode;
+use compiler::compiler_interface::Circuit;
+use crate::bucket_interpreter::BucketInterpreter;
+use crate::bucket_interpreter::env::{Env, LibraryAccess};
+use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::passes::GlobalPassData;
+
+pub struct PassMemory {
+    // Wrapped in a RefCell because the reference to the static analysis is immutable but we need
+    //  mutability. In some cases, very fine-grained mutability which is why everything here is
+    //  wrapped separately and the template/function library values themselves are wrapped separately.
+    templates_library: RefCell<HashMap<String, TemplateCode>>,
+    functions_library: RefCell<HashMap<String, FunctionCode>>,
+    constant_fields: RefCell<Vec<String>>,
+    current_scope: RefCell<String>,
+    io_map: RefCell<TemplateInstanceIOMap>,
+    signal_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    variables_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    component_addr_index_mapping: RefCell<HashMap<String, IndexMapping>>,
+    prime: String,
+}
+
+impl PassMemory {
+    pub fn new(prime: String, current_scope: String, io_map: TemplateInstanceIOMap) -> Self {
+        PassMemory {
+            prime,
+            current_scope: RefCell::new(current_scope),
+            io_map: RefCell::new(io_map),
+            constant_fields: Default::default(),
+            templates_library: Default::default(),
+            functions_library: Default::default(),
+            signal_index_mapping: Default::default(),
+            variables_index_mapping: Default::default(),
+            component_addr_index_mapping: Default::default(),
+        }
+    }
+
+    pub fn build_interpreter<'a, 'd: 'a>(
+        &'a self,
+        global_data: &'d RefCell<GlobalPassData>,
+        observer: &'a dyn InterpreterObserver,
+    ) -> BucketInterpreter {
+        self.build_interpreter_with_scope(
+            global_data,
+            observer,
+            self.current_scope.borrow().to_string(),
+        )
+    }
+
+    pub fn build_interpreter_with_scope<'a, 'd: 'a>(
+        &'a self,
+        global_data: &'d RefCell<GlobalPassData>,
+        observer: &'a dyn InterpreterObserver,
+        scope: String,
+    ) -> BucketInterpreter {
+        BucketInterpreter::init(global_data, observer, self, scope)
+    }
+
+    pub fn set_scope(&self, template: &TemplateCode) {
+        self.current_scope.replace(template.header.clone());
+    }
+
+    pub fn run_template<'d>(
+        &self,
+        global_data: &'d RefCell<GlobalPassData>,
+        observer: &dyn InterpreterObserver,
+        template: &TemplateCode,
+    ) {
+        assert!(!self.current_scope.borrow().is_empty());
+        if cfg!(debug_assertions) {
+            println!("Running template {}", self.current_scope.borrow());
+        }
+        let interpreter = self.build_interpreter(global_data, observer);
+        let env = Env::new_standard_env(self);
+        interpreter.execute_instructions(&template.body, env, true);
+    }
+
+    pub fn add_template(&self, template: &TemplateCode) {
+        self.templates_library.borrow_mut().insert(template.header.clone(), (*template).clone());
+    }
+
+    pub fn add_function(&self, function: &FunctionCode) {
+        self.functions_library.borrow_mut().insert(function.header.clone(), (*function).clone());
+    }
+
+    pub fn fill_from_circuit(&self, circuit: &Circuit) {
+        for template in &circuit.templates {
+            self.add_template(template);
+        }
+        for function in &circuit.functions {
+            self.add_function(function);
+        }
+        self.constant_fields.replace(circuit.llvm_data.field_tracking.clone());
+        self.io_map.replace(circuit.llvm_data.io_map.clone());
+        self.variables_index_mapping.replace(circuit.llvm_data.variable_index_mapping.clone());
+        self.signal_index_mapping.replace(circuit.llvm_data.signal_index_mapping.clone());
+        self.component_addr_index_mapping
+            .replace(circuit.llvm_data.component_index_mapping.clone());
+    }
+
+    pub fn get_prime(&self) -> &String {
+        &self.prime
+    }
+
+    pub fn get_field_constant(&self, index: usize) -> String {
+        self.constant_fields.borrow()[index].clone()
+    }
+
+    pub fn get_field_constants_clone(&self) -> Vec<String> {
+        self.constant_fields.borrow().clone()
+    }
+
+    /// Stores a new constant and returns its index
+    pub fn add_field_constant(&self, new_value: String) -> usize {
+        let mut temp = self.constant_fields.borrow_mut();
+        let idx = temp.len();
+        temp.push(new_value);
+        idx
+    }
+
+    pub fn get_iodef(&self, template_id: &usize, signal_code: &usize) -> IODef {
+        self.io_map.borrow()[template_id][*signal_code].clone()
+    }
+
+    pub fn get_signal_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.signal_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_signal_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_signal_index_mapping(&self.current_scope.borrow(), index)
+    }
+
+    pub fn get_variables_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.variables_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_variables_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_variables_index_mapping(&self.current_scope.borrow(), index)
+    }
+
+    pub fn get_component_addr_index_mapping(&self, scope: &String, index: &usize) -> Range<usize> {
+        self.component_addr_index_mapping.borrow()[scope][index].clone()
+    }
+
+    pub fn get_current_scope_component_addr_index_mapping(&self, index: &usize) -> Range<usize> {
+        self.get_component_addr_index_mapping(&self.current_scope.borrow(), index)
+    }
+}
+
+impl LibraryAccess for PassMemory {
+    fn get_function(&self, name: &String) -> Ref<FunctionCode> {
+        Ref::map(self.functions_library.borrow(), |map| &map[name])
+    }
+
+    fn get_template(&self, name: &String) -> Ref<TemplateCode> {
+        Ref::map(self.templates_library.borrow(), |map| &map[name])
+    }
+}
diff --git a/circuit_passes/src/bucket_interpreter/mod.rs b/circuit_passes/src/bucket_interpreter/mod.rs
index c5ff9627f..afe8d1f41 100644
--- a/circuit_passes/src/bucket_interpreter/mod.rs
+++ b/circuit_passes/src/bucket_interpreter/mod.rs
@@ -1,76 +1,52 @@
 pub mod value;
 pub mod env;
+pub mod memory;
 pub mod observer;
 pub(crate) mod operations;
 
+use std::cell::RefCell;
+use std::vec;
 use circom_algebra::modular_arithmetic;
-use code_producers::components::TemplateInstanceIOMap;
-use code_producers::llvm_elements::IndexMapping;
+use code_producers::llvm_elements::fr::{FR_IDENTITY_ARR_PTR, FR_INDEX_ARR_PTR};
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer};
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::num_bigint::BigInt;
 use observer::InterpreterObserver;
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::operations::compute_offset;
-use crate::bucket_interpreter::value::{JoinSemiLattice, Value};
-use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
-
-pub struct BucketInterpreter<'a> {
-    _scope: &'a String,
-    _prime: &'a String,
-    pub constant_fields: &'a Vec<String>,
-    pub(crate) observer: &'a dyn InterpreterObserver,
-    io_map: &'a TemplateInstanceIOMap,
+use crate::bucket_interpreter::value::Value::{self, KnownBigInt, KnownU32, Unknown};
+use crate::passes::{LOOP_BODY_FN_PREFIX, GlobalPassData};
+use self::env::LibraryAccess;
+
+pub struct BucketInterpreter<'a, 'd> {
+    global_data: &'d RefCell<GlobalPassData>,
+    observer: &'a dyn InterpreterObserver,
+    mem: &'a PassMemory,
+    scope: String,
     p: BigInt,
-    signal_index_mapping: &'a IndexMapping,
-    variables_index_mapping: &'a IndexMapping,
-    component_addr_index_mapping: &'a IndexMapping,
 }
 
 pub type R<'a> = (Option<Value>, Env<'a>);
 
-impl JoinSemiLattice for Option<Value> {
-    fn join(&self, other: &Self) -> Self {
-        match (self, other) {
-            (x, None) => x.clone(),
-            (None, x) => x.clone(),
-            (Some(x), Some(y)) => Some(x.join(y)),
-        }
-    }
-}
-
-impl JoinSemiLattice for R<'_> {
-    fn join(&self, other: &Self) -> Self {
-        (self.0.join(&other.0), self.1.join(&other.1))
-    }
-}
-
-impl<'a> BucketInterpreter<'a> {
+impl<'a: 'd, 'd> BucketInterpreter<'a, 'd> {
     pub fn init(
-        scope: &'a String,
-        prime: &'a String,
-        constant_fields: &'a Vec<String>,
+        global_data: &'d RefCell<GlobalPassData>,
         observer: &'a dyn InterpreterObserver,
-        io_map: &'a TemplateInstanceIOMap,
-        signal_index_mapping: &'a IndexMapping,
-        variables_index_mapping: &'a IndexMapping,
-        component_addr_index_mapping: &'a IndexMapping,
+        mem: &'a PassMemory,
+        scope: String,
     ) -> Self {
         BucketInterpreter {
-            _scope: scope,
-            _prime: prime,
-            constant_fields,
+            global_data,
             observer,
-            io_map,
-            p: UsefulConstants::new(prime).get_p().clone(),
-            signal_index_mapping,
-            variables_index_mapping,
-            component_addr_index_mapping,
+            mem,
+            scope,
+            p: UsefulConstants::new(mem.get_prime()).get_p().clone(),
         }
     }
 
-    fn get_id_from_indexed_location(&self, location: &LocationRule, env: &Env) -> usize {
+    pub fn get_index_from_location(&self, location: &LocationRule, env: &Env) -> usize {
         match location {
             LocationRule::Indexed { location, .. } => {
                 let (idx, _) = self.execute_instruction(location, env.clone(), false);
@@ -90,34 +66,20 @@ impl<'a> BucketInterpreter<'a> {
     ) {
         match bucket.dest_address_type {
             AddressType::Variable => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self
-                    .variables_index_mapping
-                    .get(&idx)
-                    .expect(
-                        format!(
-                            "Could not get idx {idx} from mapping. Min key {:?}. Max key {:?}",
-                            self.variables_index_mapping.keys().min(),
-                            self.variables_index_mapping.keys().max()
-                        )
-                        .as_str(),
-                    )
-                    .clone();
-                for index in indices {
+                let idx = self.get_index_from_location(&bucket.dest, env);
+                for index in self.mem.get_variables_index_mapping(&self.scope, &idx) {
                     vars.push(index);
                 }
             }
             AddressType::Signal => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self.signal_index_mapping[&idx].clone();
-                for index in indices {
+                let idx = self.get_index_from_location(&bucket.dest, env);
+                for index in self.mem.get_signal_index_mapping(&self.scope, &idx) {
                     signals.push(index);
                 }
             }
             AddressType::SubcmpSignal { .. } => {
-                let idx = self.get_id_from_indexed_location(&bucket.dest, env);
-                let indices = self.component_addr_index_mapping[&idx].clone();
-                for index in indices {
+                let idx = self.get_index_from_location(&bucket.dest, env);
+                for index in self.mem.get_component_addr_index_mapping(&self.scope, &idx) {
                     subcmps.push(index);
                 }
             }
@@ -219,14 +181,14 @@ impl<'a> BucketInterpreter<'a> {
     ) -> R<'env> {
         (
             Some(match bucket.parse_as {
+                ValueType::U32 => KnownU32(bucket.value),
                 ValueType::BigInt => {
-                    let constant = &self.constant_fields[bucket.value];
+                    let constant = self.mem.get_field_constant(bucket.value);
                     KnownBigInt(
                         BigInt::parse_bytes(constant.as_bytes(), 10)
                             .expect(format!("Cannot parse constant {}", constant).as_str()),
                     )
                 }
-                ValueType::U32 => KnownU32(bucket.value),
             }),
             env,
         )
@@ -273,11 +235,8 @@ impl<'a> BucketInterpreter<'a> {
             }
             AddressType::SubcmpSignal { cmp_address, .. } => {
                 let (addr, env) = self.execute_instruction(cmp_address, env, observe);
-                let addr = addr
-                    .expect(
-                        "cmp_address instruction in StoreBucket SubcmpSignal must produce a value!",
-                    )
-                    .get_u32();
+                let addr =
+                    addr.expect("cmp_address in SubcmpSignal must produce a value!").get_u32();
                 let continue_observing =
                     if observe { self.observer.on_location_rule(&bucket.src, &env) } else { false };
                 let (idx, env) = match &bucket.src {
@@ -289,7 +248,7 @@ impl<'a> BucketInterpreter<'a> {
                     LocationRule::Mapped { signal_code, indexes } => {
                         let mut acc_env = env;
                         let io_def =
-                            &self.io_map[&acc_env.get_subcmp_template_id(addr)][*signal_code];
+                            self.mem.get_iodef(&acc_env.get_subcmp_template_id(addr), signal_code);
                         let map_access = io_def.offset;
                         if indexes.len() > 0 {
                             let mut indexes_values = vec![];
@@ -381,7 +340,7 @@ impl<'a> BucketInterpreter<'a> {
                         let mut acc_env = env;
                         let name = Some(acc_env.get_subcmp_name(addr).clone());
                         let io_def =
-                            &self.io_map[&acc_env.get_subcmp_template_id(addr)][*signal_code];
+                            self.mem.get_iodef(&acc_env.get_subcmp_template_id(addr), signal_code);
                         let map_access = io_def.offset;
                         if indexes.len() > 0 {
                             let mut indexes_values = vec![];
@@ -456,39 +415,90 @@ impl<'a> BucketInterpreter<'a> {
         (computed_value, env)
     }
 
+    fn run_function_loopbody<'env>(&self, name: &String, env: Env<'env>, observe: bool) -> R<'env> {
+        if cfg!(debug_assertions) {
+            println!("Running function {}", name);
+        };
+        let mut res: R<'env> = (
+            None,
+            Env::new_extracted_func_env(
+                env.clone(),
+                self.global_data.borrow().extract_func_orig_loc[name][&env.get_vars_sort()].clone(),
+            ),
+        );
+        //NOTE: Do not change scope for the new interpreter because the mem lookups within
+        //  `get_write_operations_in_store_bucket` need to use the original function context.
+        let interp = self.mem.build_interpreter(self.global_data, self.observer);
+        let observe = observe && !interp.observer.ignore_function_calls();
+        let instructions = &env.get_function(name).body;
+        unsafe {
+            let ptr = instructions.as_ptr();
+            for i in 0..instructions.len() {
+                let inst = ptr.add(i).as_ref().unwrap();
+                res = interp.execute_instruction(inst, res.1, observe);
+            }
+        }
+        //Remove the Env::ExtractedFunction wrapper
+        (res.0, res.1.peel_extracted_func())
+    }
+
+    fn run_function_basic<'env>(&self, name: &String, args: Vec<Value>, observe: bool) -> Value {
+        if cfg!(debug_assertions) {
+            println!("Running function {}", name);
+        }
+        let mut new_env = Env::new_standard_env(self.mem);
+        for (id, arg) in args.iter().enumerate() {
+            new_env = new_env.set_var(id, arg.clone());
+        }
+        let interp =
+            self.mem.build_interpreter_with_scope(self.global_data, self.observer, name.clone());
+        let (v, _) = interp.execute_instructions(
+            &self.mem.get_function(name).body,
+            new_env,
+            observe && !interp.observer.ignore_function_calls(),
+        );
+        v.expect("Function must produce a value!")
+    }
+
     pub fn execute_call_bucket<'env>(
         &self,
         bucket: &'env CallBucket,
         env: Env<'env>,
         observe: bool,
     ) -> R<'env> {
-        let mut args = vec![];
         let mut env = env;
-        for i in &bucket.arguments {
-            let (value, new_env) = self.execute_instruction(i, env, observe);
-            env = new_env;
-            args.push(value.expect("Function argument must produce a value!"));
-        }
-
-        let any_unknown = args.iter().any(|v| v.is_unknown());
-
-        //let result = env.run_function(&bucket.symbol, self, args, observe);
-        let result = if any_unknown {
-            Unknown
+        let res = if bucket.symbol.eq(FR_IDENTITY_ARR_PTR) || bucket.symbol.eq(FR_INDEX_ARR_PTR) {
+            (Some(Unknown), env)
+        } else if bucket.symbol.starts_with(LOOP_BODY_FN_PREFIX) {
+            // The extracted loop body functions can change any values in the environment
+            //  via the parameters passed to it. So interpret the function and keep the
+            //  resulting Env (as if the function had executed inline).
+            self.run_function_loopbody(&bucket.symbol, env, observe)
         } else {
-            env.run_function(&bucket.symbol, self, args, observe)
+            let mut args = vec![];
+            for i in &bucket.arguments {
+                let (value, new_env) = self.execute_instruction(i, env, observe);
+                env = new_env;
+                args.push(value.expect("Function argument must produce a value!"));
+            }
+            let v = if args.iter().any(|v| v.is_unknown()) {
+                Unknown
+            } else {
+                self.run_function_basic(&bucket.symbol, args, observe)
+            };
+            (Some(v), env)
         };
 
-        // Write the result in the destination according to the address type
+        // Write the result in the destination according to the ReturnType
         match &bucket.return_info {
-            ReturnType::Intermediate { .. } => (Some(result), env),
+            ReturnType::Intermediate { .. } => res,
             ReturnType::Final(final_data) => (
                 None,
                 self.store_value_in_address(
                     &final_data.dest_address_type,
                     &final_data.dest,
-                    result,
-                    env,
+                    res.0.expect("Function must return a value!"),
+                    res.1,
                     observe,
                 ),
             ),
@@ -594,16 +604,10 @@ impl<'a> BucketInterpreter<'a> {
         return match cond_bool_result {
             None => (None, None, env),
             Some(true) => {
-                if cfg!(debug_assertions) {
-                    println!("Running then branch");
-                }
                 let (ret, env) = self.execute_instructions(&true_branch, env, observe);
                 (ret, Some(true), env)
             }
             Some(false) => {
-                if cfg!(debug_assertions) {
-                    println!("Running else branch");
-                }
                 let (ret, env) = self.execute_instructions(&false_branch, env, observe);
                 (ret, Some(false), env)
             }
diff --git a/circuit_passes/src/bucket_interpreter/value.rs b/circuit_passes/src/bucket_interpreter/value.rs
index 3d6b1dd44..dcceb1335 100644
--- a/circuit_passes/src/bucket_interpreter/value.rs
+++ b/circuit_passes/src/bucket_interpreter/value.rs
@@ -1,20 +1,16 @@
-use std::fmt::{Display, Formatter};
+use std::fmt::{Debug, Display, Formatter};
 use compiler::intermediate_representation::ir_interface::{ValueBucket, ValueType};
 use compiler::num_bigint::BigInt;
 use compiler::num_traits::ToPrimitive;
 use compiler::intermediate_representation::new_id;
 use circom_algebra::modular_arithmetic;
-use circom_algebra::modular_arithmetic::ArithmeticError;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::value::Value::{KnownBigInt, KnownU32, Unknown};
 
-pub trait JoinSemiLattice {
-    fn join(&self, other: &Self) -> Self;
-}
-
 /// Poor man's lattice that gives up the moment values are not equal
 /// It's a join semi lattice with a top (Unknown)
 /// Not a complete lattice because there is no bottom
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Eq, PartialEq, Ord, PartialOrd)]
 pub enum Value {
     Unknown,
     KnownU32(usize),
@@ -26,19 +22,17 @@ impl Display for Value {
         match self {
             Unknown => write!(f, "Unknown"),
             KnownU32(n) => write!(f, "{}", n),
-            KnownBigInt(n) => write!(f, "BigInt({})", n),
+            KnownBigInt(n) => write!(f, "{}", n),
         }
     }
 }
 
-impl JoinSemiLattice for Value {
-    /// a ⊔ b = a    iff a = b
-    /// a ⊔ b = UNK  otherwise
-    fn join(&self, other: &Self) -> Self {
-        if self == other {
-            self.clone()
-        } else {
-            Unknown
+impl Debug for Value {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Unknown => write!(f, "Unknown"),
+            KnownU32(n) => write!(f, "{}", n),
+            KnownBigInt(n) => write!(f, "BigInt({})", n),
         }
     }
 }
@@ -54,7 +48,7 @@ impl Value {
     pub fn get_bigint_as_string(&self) -> String {
         match self {
             KnownBigInt(b) => b.to_string(),
-            _ => panic!("Can't extract a string representation of a non big int"),
+            _ => panic!("Value is not a KnownBigInt! {:?}", self),
         }
     }
 
@@ -84,7 +78,7 @@ impl Value {
         }
     }
 
-    pub fn to_value_bucket(&self, constant_fields: &mut Vec<String>) -> ValueBucket {
+    pub fn to_value_bucket(&self, mem: &PassMemory) -> ValueBucket {
         match self {
             Unknown => panic!("Can't create a value bucket from an unknown value!"),
             KnownU32(n) => ValueBucket {
@@ -96,20 +90,15 @@ impl Value {
                 op_aux_no: 0,
                 value: *n,
             },
-            KnownBigInt(n) => {
-                let str_repr = n.to_string();
-                let idx = constant_fields.len();
-                constant_fields.push(str_repr);
-                ValueBucket {
-                    id: new_id(),
-                    source_file_id: None,
-                    line: 0,
-                    message_id: 0,
-                    parse_as: ValueType::BigInt,
-                    op_aux_no: 0,
-                    value: idx,
-                }
-            }
+            KnownBigInt(n) => ValueBucket {
+                id: new_id(),
+                source_file_id: None,
+                line: 0,
+                message_id: 0,
+                parse_as: ValueType::BigInt,
+                op_aux_no: 0,
+                value: mem.add_field_constant(n.to_string()),
+            },
         }
     }
 }
@@ -142,7 +131,7 @@ fn wrap_op_result(
     rhs: &Value,
     field: &BigInt,
     u32_op: impl Fn(&usize, &usize) -> usize,
-    bigint_op: impl Fn(&BigInt, &BigInt, &BigInt) -> Result<BigInt, ArithmeticError>,
+    bigint_op: impl Fn(&BigInt, &BigInt, &BigInt) -> Result<BigInt, modular_arithmetic::ArithmeticError>,
 ) -> Value {
     match (lhs, rhs) {
         (Unknown, _) => Unknown,
diff --git a/circuit_passes/src/passes/conditional_flattening.rs b/circuit_passes/src/passes/conditional_flattening.rs
index 84e32ed5d..56b9dce83 100644
--- a/circuit_passes/src/passes/conditional_flattening.rs
+++ b/circuit_passes/src/passes/conditional_flattening.rs
@@ -5,26 +5,28 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct ConditionalFlattening {
+pub struct ConditionalFlatteningPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<BranchBucket, bool>>,
 }
 
-impl ConditionalFlattening {
-    pub fn new(prime: &String) -> Self {
-        ConditionalFlattening {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+impl<'d> ConditionalFlatteningPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
+        ConditionalFlatteningPass {
+            global_data,
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
 }
 
-impl InterpreterObserver for ConditionalFlattening {
+impl InterpreterObserver for ConditionalFlatteningPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -74,8 +76,7 @@ impl InterpreterObserver for ConditionalFlattening {
     }
 
     fn on_branch_bucket(&self, bucket: &BranchBucket, env: &Env) -> bool {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (_, cond_result, _) = interpreter.execute_conditional_bucket(
             &bucket.cond,
             &bucket.if_branch,
@@ -106,22 +107,22 @@ impl InterpreterObserver for ConditionalFlattening {
     }
 }
 
-impl CircuitTransformationPass for ConditionalFlattening {
+impl CircuitTransformationPass for ConditionalFlatteningPass<'_> {
     fn name(&self) -> &str {
         "ConditionalFlattening"
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_branch_bucket(&self, bucket: &BranchBucket) -> InstructionPointer {
@@ -134,18 +135,20 @@ impl CircuitTransformationPass for ConditionalFlattening {
                 message_id: bucket.message_id,
                 body: code.clone(),
                 n_iters: 1,
+                label: format!("fold_{}", side),
             };
-            return self.transform_block_bucket(&block);
-        }
-        BranchBucket {
-            id: new_id(),
-            source_file_id: bucket.source_file_id,
-            line: bucket.line,
-            message_id: bucket.message_id,
-            cond: self.transform_instruction(&bucket.cond),
-            if_branch: self.transform_instructions(&bucket.if_branch),
-            else_branch: self.transform_instructions(&bucket.else_branch),
+            self.transform_block_bucket(&block)
+        } else {
+            BranchBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                cond: self.transform_instruction(&bucket.cond),
+                if_branch: self.transform_instructions(&bucket.if_branch),
+                else_branch: self.transform_instructions(&bucket.else_branch),
+            }
+            .allocate()
         }
-        .allocate()
     }
 }
diff --git a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
index 795c89b00..94f3ab03c 100644
--- a/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
+++ b/circuit_passes/src/passes/deterministic_subcomponent_invocation.rs
@@ -5,20 +5,22 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::intermediate_representation::ir_interface::StatusInput::{Last, NoLast};
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct DeterministicSubCmpInvokePass {
+pub struct DeterministicSubCmpInvokePass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<AddressType, StatusInput>>,
 }
 
-impl DeterministicSubCmpInvokePass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> DeterministicSubCmpInvokePass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         DeterministicSubCmpInvokePass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            global_data,
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
@@ -35,8 +37,7 @@ impl DeterministicSubCmpInvokePass {
         } = address_type
         {
             let env = env.clone();
-            let mem = self.memory.borrow();
-            let interpreter = mem.build_interpreter(self);
+            let interpreter = self.memory.build_interpreter(self.global_data, self);
             let (addr, env) = interpreter.execute_instruction(cmp_address, env, false);
             let addr = addr
                 .expect("cmp_address instruction in SubcmpSignal must produce a value!")
@@ -47,7 +48,7 @@ impl DeterministicSubCmpInvokePass {
     }
 }
 
-impl InterpreterObserver for DeterministicSubCmpInvokePass {
+impl InterpreterObserver for DeterministicSubCmpInvokePass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -124,22 +125,22 @@ impl InterpreterObserver for DeterministicSubCmpInvokePass {
     }
 }
 
-impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
+impl CircuitTransformationPass for DeterministicSubCmpInvokePass<'_> {
     fn name(&self) -> &str {
         "DeterministicSubCmpInvokePass"
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_address_type(&self, address: &AddressType) -> AddressType {
@@ -150,6 +151,7 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
                 uniform_parallel_value,
                 is_output,
                 input_information,
+                counter_override,
             } => AddressType::SubcmpSignal {
                 cmp_address: self.transform_instruction(&cmp_address),
                 uniform_parallel_value: uniform_parallel_value.clone(),
@@ -159,6 +161,7 @@ impl CircuitTransformationPass for DeterministicSubCmpInvokePass {
                 } else {
                     input_information.clone()
                 },
+                counter_override: *counter_override,
             },
             x => x.clone(),
         }
diff --git a/circuit_passes/src/passes/loop_unroll/body_extractor.rs b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
new file mode 100644
index 000000000..dbbcaf250
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/body_extractor.rs
@@ -0,0 +1,507 @@
+use std::cell::{RefCell, Ref};
+use std::collections::{BTreeMap, HashMap};
+use std::vec;
+use indexmap::{IndexMap, IndexSet};
+use code_producers::llvm_elements::fr::*;
+use compiler::circuit_design::function::{FunctionCodeInfo, FunctionCode};
+use compiler::hir::very_concrete_program::Param;
+use compiler::intermediate_representation::{
+    BucketId, InstructionList, InstructionPointer, new_id, UpdateId,
+};
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::LOOP_BODY_FN_PREFIX;
+use crate::passes::loop_unroll::extracted_location_updater::ExtractedFunctionLocationUpdater;
+use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
+use super::new_u32_value;
+
+pub type FuncArgIdx = usize;
+pub type AddressOffset = usize;
+pub type UnrolledIterLvars = BTreeMap<usize, Value>;
+pub type ToOriginalLocation = HashMap<FuncArgIdx, (AddressType, AddressOffset)>;
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub enum ArgIndex {
+    Signal(FuncArgIdx),
+    SubCmp { signal: FuncArgIdx, arena: FuncArgIdx, counter: FuncArgIdx },
+}
+
+impl ArgIndex {
+    pub fn get_signal_idx(&self) -> FuncArgIdx {
+        match *self {
+            ArgIndex::Signal(signal) => signal,
+            ArgIndex::SubCmp { signal, .. } => signal,
+        }
+    }
+}
+
+/// Need this structure to skip id/metadata fields in ValueBucket when using as map key.
+/// Also, the input/output stuff doesn't matter since the extra arguments that are added
+/// based on this are only used to trigger generation of the run function after all of
+/// the inputs have been assigned.
+#[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
+struct SubcmpSignalHashFix {
+    cmp_address_parse_as: ValueType,
+    cmp_address_op_aux_no: usize,
+    cmp_address_value: usize,
+    uniform_parallel_value: Option<bool>,
+    counter_override: bool,
+}
+
+impl SubcmpSignalHashFix {
+    fn convert(addr: &AddressType) -> SubcmpSignalHashFix {
+        if let AddressType::SubcmpSignal {
+            cmp_address,
+            uniform_parallel_value,
+            counter_override,
+            ..
+        } = addr
+        {
+            if let Instruction::Value(ValueBucket { parse_as, op_aux_no, value, .. }) =
+                **cmp_address
+            {
+                return SubcmpSignalHashFix {
+                    cmp_address_parse_as: parse_as,
+                    cmp_address_op_aux_no: op_aux_no,
+                    cmp_address_value: value,
+                    uniform_parallel_value: uniform_parallel_value.clone(),
+                    counter_override: counter_override.clone(),
+                };
+            }
+        }
+        panic!("improper AddressType given")
+    }
+}
+
+struct ExtraArgsResult {
+    bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>>,
+    bucket_to_args: IndexMap<BucketId, ArgIndex>,
+    num_args: usize,
+}
+
+impl ExtraArgsResult {
+    fn get_passing_refs_for_itr(
+        &self,
+        iter_num: usize,
+    ) -> Vec<(&Option<(AddressType, AddressOffset)>, ArgIndex)> {
+        self.bucket_to_itr_to_ref
+            .iter()
+            .map(|(k, v)| (&v[iter_num], self.bucket_to_args[k]))
+            .collect()
+    }
+
+    fn get_reverse_passing_refs_for_itr(&self, iter_num: usize) -> ToOriginalLocation {
+        self.bucket_to_itr_to_ref.iter().fold(ToOriginalLocation::new(), |mut acc, (k, v)| {
+            if let Some((addr_ty, addr_offset)) = v[iter_num].as_ref() {
+                acc.insert(
+                    self.bucket_to_args[k].get_signal_idx(),
+                    (addr_ty.clone(), *addr_offset),
+                );
+            }
+            acc
+        })
+    }
+}
+
+#[derive(Clone, Debug, Eq, PartialEq, Default)]
+pub struct LoopBodyExtractor {
+    new_body_functions: RefCell<Vec<FunctionCode>>,
+}
+
+impl LoopBodyExtractor {
+    pub fn get_new_functions(&self) -> Ref<Vec<FunctionCode>> {
+        self.new_body_functions.borrow()
+    }
+
+    pub fn extract<'a>(
+        &self,
+        bucket: &LoopBucket,
+        recorder: &'a EnvRecorder<'a, '_>,
+        unrolled: &mut InstructionList,
+    ) {
+        assert!(bucket.body.len() > 1);
+        let extra_arg_info = Self::compute_extra_args(&recorder);
+        let name = self.build_new_body(
+            bucket,
+            extra_arg_info.bucket_to_args.clone(),
+            extra_arg_info.num_args,
+        );
+        for iter_num in 0..recorder.get_iter() {
+            // NOTE: CallBucket arguments must use a LoadBucket to reference the necessary pointers
+            //  within the current body. However, it doesn't actually need to generate a load
+            //  instruction to use these pointers as parameters to the function so we must use the
+            //  `bounded_fn` field of the LoadBucket to specify the identity function to perform
+            //  the "loading" (but really it just returns the pointer that was passed in).
+            let mut args = Self::new_filled_vec(
+                extra_arg_info.num_args,
+                NopBucket { id: 0 }.allocate(), // garbage fill
+            );
+            // Parameter for local vars
+            args[0] = Self::new_storage_ptr_ref(bucket, AddressType::Variable);
+            // Parameter for signals/arena
+            args[1] = Self::new_storage_ptr_ref(bucket, AddressType::Signal);
+            // Additional parameters for subcmps and variant array indexing within the loop
+            for (loc, ai) in extra_arg_info.get_passing_refs_for_itr(iter_num) {
+                match loc {
+                    None => match ai {
+                        ArgIndex::Signal(signal) => {
+                            args[signal] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
+                        }
+                        ArgIndex::SubCmp { signal, arena, counter } => {
+                            args[signal] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
+                            args[arena] = Self::new_null_ptr(bucket, FR_NULL_I256_ARR_PTR);
+                            args[counter] = Self::new_null_ptr(bucket, FR_NULL_I256_PTR);
+                        }
+                    },
+                    Some((at, val)) => match ai {
+                        ArgIndex::Signal(signal) => {
+                            args[signal] =
+                                Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val)
+                        }
+                        ArgIndex::SubCmp { signal, arena, counter } => {
+                            // Pass specific signal referenced
+                            args[signal] =
+                                Self::new_indexed_storage_ptr_ref(bucket, at.clone(), *val);
+                            // Pass entire subcomponent arena for calling the 'template_run' function
+                            args[arena] = Self::new_storage_ptr_ref(bucket, at.clone());
+                            // Pass subcomponent counter reference
+                            if let AddressType::SubcmpSignal { cmp_address, .. } = &at {
+                                //TODO: may only need to add this when is_output=true but have to skip adding the Param too in that case.
+                                args[counter] = Self::new_subcmp_counter_storage_ptr_ref(
+                                    bucket,
+                                    cmp_address.clone(),
+                                );
+                            } else {
+                                unreachable!()
+                            }
+                        }
+                    },
+                }
+            }
+            unrolled.push(
+                CallBucket {
+                    id: new_id(),
+                    source_file_id: bucket.source_file_id,
+                    line: bucket.line,
+                    message_id: bucket.message_id,
+                    symbol: name.clone(),
+                    return_info: ReturnType::Intermediate { op_aux_no: 0 },
+                    arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+                    argument_types: vec![], // LLVM IR generation doesn't use this field
+                    arguments: args,
+                }
+                .allocate(),
+            );
+
+            recorder.record_reverse_arg_mapping(
+                name.clone(),
+                recorder.get_vals_per_iter().get(&iter_num).unwrap().env_at_header.get_vars_sort(),
+                extra_arg_info.get_reverse_passing_refs_for_itr(iter_num),
+            );
+        }
+    }
+
+    fn build_new_body(
+        &self,
+        bucket: &LoopBucket,
+        mut bucket_to_args: IndexMap<BucketId, ArgIndex>,
+        num_args: usize,
+    ) -> String {
+        // NOTE: must create parameter list before 'bucket_to_args' is modified
+        // Since the ArgIndex instances could have indices in any random order,
+        //  create the vector of required size and then set elements by index.
+        let mut params = Self::new_filled_vec(
+            num_args,
+            Param { name: String::from("EMPTY"), length: vec![usize::MAX] },
+        );
+        params[0] = Param { name: String::from("lvars"), length: vec![0] };
+        params[1] = Param { name: String::from("signals"), length: vec![0] };
+        for (i, arg_index) in bucket_to_args.values().enumerate() {
+            match arg_index {
+                ArgIndex::Signal(signal) => {
+                    //Single signal uses scalar pointer
+                    params[*signal] = Param { name: format!("fix_{}", i), length: vec![] };
+                }
+                ArgIndex::SubCmp { signal, arena, counter } => {
+                    //Subcomponent arena requires array pointer but the others are scalar
+                    params[*arena] = Param { name: format!("sub_{}", i), length: vec![0] };
+                    params[*signal] = Param { name: format!("subfix_{}", i), length: vec![] };
+                    params[*counter] = Param { name: format!("subc_{}", i), length: vec![] };
+                }
+            }
+        }
+
+        // Copy loop body and add a "return void" at the end
+        let mut new_body = vec![];
+        for s in &bucket.body {
+            let mut copy: InstructionPointer = s.clone();
+            //Traverse each cloned statement before calling `update_id()` and replace the
+            //  old location reference with reference to the proper argument. Mappings are
+            //  removed as they are processed so no change is needed once the map is empty.
+            let suffix = if !bucket_to_args.is_empty() {
+                let mut upd = ExtractedFunctionLocationUpdater::new();
+                upd.check_instruction(&mut copy, &mut bucket_to_args);
+                upd.insert_after
+            } else {
+                InstructionList::default()
+            };
+            copy.update_id();
+            new_body.push(copy);
+            for s in suffix {
+                new_body.push(s);
+            }
+        }
+        assert!(bucket_to_args.is_empty());
+        new_body.push(
+            ReturnBucket {
+                id: new_id(),
+                source_file_id: bucket.source_file_id,
+                line: bucket.line,
+                message_id: bucket.message_id,
+                with_size: usize::MAX, // size > 1 will produce "return void" LLVM instruction
+                value: NopBucket { id: new_id() }.allocate(),
+            }
+            .allocate(),
+        );
+        // Create new function to hold the copied body
+        // NOTE: This name must start with `GENERATED_FN_PREFIX` (which is the prefix
+        //  of `LOOP_BODY_FN_PREFIX`) so that `ExtractedFunctionCtx` will be used.
+        let func_name = format!("{}{}", LOOP_BODY_FN_PREFIX, new_id());
+        let new_func = Box::new(FunctionCodeInfo {
+            source_file_id: bucket.source_file_id,
+            line: bucket.line,
+            name: func_name.clone(),
+            header: func_name.clone(),
+            body: new_body,
+            params,
+            returns: vec![], // void return type on the function
+            ..FunctionCodeInfo::default()
+        });
+        // Store the function to be transformed and added to circuit later
+        self.new_body_functions.borrow_mut().push(new_func);
+        func_name
+    }
+
+    fn new_custom_fn_load_bucket(
+        bucket: &dyn ObtainMeta,
+        load_fun: &str,
+        addr_type: AddressType,
+        location: InstructionPointer,
+    ) -> InstructionPointer {
+        LoadBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            address_type: addr_type,
+            src: LocationRule::Indexed { location, template_header: None },
+            bounded_fn: Some(String::from(load_fun)),
+        }
+        .allocate()
+    }
+
+    fn new_storage_ptr_ref(bucket: &dyn ObtainMeta, addr_type: AddressType) -> InstructionPointer {
+        Self::new_custom_fn_load_bucket(
+            bucket,
+            FR_IDENTITY_ARR_PTR,
+            addr_type,
+            new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+        )
+    }
+
+    //NOTE: When the 'bounded_fn' for LoadBucket is Some(_), the index parameter
+    //  is ignored so we must instead use `FR_INDEX_ARR_PTR` to apply the index.
+    //  Uses of that function can be inlined later.
+    // NOTE: Must start with `GENERATED_FN_PREFIX` to use `ExtractedFunctionCtx`
+    fn new_indexed_storage_ptr_ref(
+        bucket: &dyn ObtainMeta,
+        addr_type: AddressType,
+        index: AddressOffset,
+    ) -> InstructionPointer {
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            symbol: String::from(FR_INDEX_ARR_PTR),
+            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+            argument_types: vec![], // LLVM IR generation doesn't use this field
+            arguments: vec![
+                Self::new_storage_ptr_ref(bucket, addr_type),
+                new_u32_value(bucket, index),
+            ],
+        }
+        .allocate()
+    }
+
+    fn new_subcmp_counter_storage_ptr_ref(
+        bucket: &dyn ObtainMeta,
+        sub_cmp_id: InstructionPointer,
+    ) -> InstructionPointer {
+        Self::new_custom_fn_load_bucket(
+            bucket,
+            FR_PTR_CAST_I32_I256,
+            AddressType::SubcmpSignal {
+                cmp_address: sub_cmp_id,
+                uniform_parallel_value: Option::None,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+                counter_override: true,
+            },
+            new_u32_value(bucket, usize::MAX), //index is ignored for these
+        )
+    }
+
+    fn new_null_ptr(bucket: &dyn ObtainMeta, null_fun: &str) -> InstructionPointer {
+        CallBucket {
+            id: new_id(),
+            source_file_id: bucket.get_source_file_id().clone(),
+            line: bucket.get_line(),
+            message_id: bucket.get_message_id(),
+            symbol: String::from(null_fun),
+            return_info: ReturnType::Intermediate { op_aux_no: 0 },
+            arena_size: 0, // size 0 indicates arguments should not be placed into an arena
+            argument_types: vec![], // LLVM IR generation doesn't use this field
+            arguments: vec![],
+        }
+        .allocate()
+    }
+
+    fn all_same<T>(data: T) -> bool
+    where
+        T: Iterator,
+        T::Item: PartialEq,
+    {
+        data.fold((true, None), {
+            |acc, elem| {
+                if acc.1.is_some() {
+                    (acc.0 && (acc.1.unwrap() == elem), Some(elem))
+                } else {
+                    (true, Some(elem))
+                }
+            }
+        })
+        .0
+    }
+
+    /// The ideal scenario for extracting the loop body into a new function is to only
+    /// need 2 function arguments, lvars and signals. However, we want to avoid variable
+    /// indexing within the extracted function so we include extra pointer arguments
+    /// that allow the indexing to happen in the original body where the loop will be
+    /// unrolled and the indexing will become known constant values. This computes the
+    /// extra arguments that will be needed.
+    fn compute_extra_args<'a>(recorder: &'a EnvRecorder<'a, '_>) -> ExtraArgsResult {
+        // Table structure indexed first by load/store BucketId, then by iteration number.
+        //  View the first (BucketId) as columns and the second (iteration number) as rows.
+        //  The data reference is wrapped in Option to allow for some iterations that don't
+        //  execute a specific bucket due to conditional branches within the loop body.
+        //  When comparing values across iterations, ignore those cases where there is no
+        //  value for a certain iteration and only check among those iterations that have a
+        //  value because it doesn't matter what parameter is passed in for those iterations
+        //  that do not execute that specific bucket. This is the reason it was important to
+        //  store Unknown values in the `loadstore_to_index` index as well, so they are not
+        //  confused with values that simply don't exist.
+        let mut bucket_to_itr_to_ref: HashMap<BucketId, Vec<Option<(AddressType, AddressOffset)>>> =
+            HashMap::new();
+        //
+        let mut bucket_to_args: IndexMap<BucketId, ArgIndex> = IndexMap::new();
+        let vpi = recorder.get_vals_per_iter();
+        // NOTE: starts at 2 because the current component's signal arena and lvars are first.
+        let mut next_idx: FuncArgIdx = 2;
+        // First step is to collect all location references into the 'bucket_to_itr_to_ref' table.
+        // NOTE: collect to IndexSet to preserve insertion order to stabilize test output.
+        let all_loadstore_bucket_ids: IndexSet<&BucketId> =
+            vpi.values().flat_map(|x| x.loadstore_to_index.keys()).collect();
+        for id in all_loadstore_bucket_ids {
+            let column = bucket_to_itr_to_ref.entry(*id).or_default();
+            for iter_num in 0..recorder.get_iter() {
+                let temp = vpi[&iter_num].loadstore_to_index.get(id);
+                // ASSERT: index values are known in every (available) iteration
+                assert!(temp.is_none() || !temp.unwrap().1.is_unknown());
+                column.push(temp.map(|(a, v)| (a.clone(), v.get_u32())));
+            }
+            // ASSERT: same AddressType kind for this bucket in every (available) iteration
+            assert!(Self::all_same(
+                column.iter().filter_map(|x| x.as_ref()).map(|x| std::mem::discriminant(&x.0))
+            ));
+
+            // Check if the computed index value for this bucket is the same across all iterations (where it is
+            //  not None, see earlier comment). If it is not, then an extra function argument is needed for it.
+            //  Actually, check not only the computed index Value but the AddressType as well to capture when
+            //  it's a SubcmpSignal referencing a different subcomponent (the AddressType::cmp_address field
+            //  was also interpreted within the EnvRecorder so this comparison will be accurate).
+            if !Self::all_same(column.iter().filter_map(|x| x.as_ref())) {
+                bucket_to_args.insert(*id, ArgIndex::Signal(next_idx));
+                next_idx += 1;
+            }
+        }
+        //ASSERT: All columns have the same length (i.e. the number of iterations)
+        assert!(bucket_to_itr_to_ref.values().all(|x| x.len() == recorder.get_iter()));
+
+        // Also, if it's a subcomponent reference, then extra arguments are needed for it's
+        //  signal arena and counter (because subcomponents are not included by default like
+        //  the current component's signal arena and lvars are).
+        // Find groups of BucketId that use the same SubcmpSignal (to reduce number of arguments).
+        //  A group must have this same property in all iterations in order to be safe to combine.
+        let mut safe_groups: BTreeMap<SubcmpSignalHashFix, IndexSet<BucketId>> = Default::default();
+        for iter_num in 0..recorder.get_iter() {
+            let grps: BTreeMap<SubcmpSignalHashFix, IndexSet<BucketId>> = bucket_to_itr_to_ref
+                .iter()
+                .map(|(k, col)| (k, &col[iter_num]))
+                .fold(BTreeMap::new(), |mut r, (b, a)| {
+                    if let Some((at, _)) = a {
+                        if let AddressType::SubcmpSignal { .. } = at {
+                            r.entry(SubcmpSignalHashFix::convert(&at)).or_default().insert(*b);
+                        }
+                    }
+                    r
+                });
+            // Assume all groups are safe until proven otherwise. So if it's empty at any point, just quit.
+            if iter_num == 0 {
+                safe_groups = grps;
+            } else {
+                safe_groups.retain(|_, v| grps.values().any(|x| x == v));
+            }
+            if safe_groups.is_empty() {
+                break;
+            }
+        }
+        for (_, buckets) in safe_groups.iter() {
+            let arena_idx: FuncArgIdx = next_idx;
+            let counter_idx: FuncArgIdx = next_idx + 1;
+            next_idx += 2;
+            for b in buckets {
+                if let Some(ArgIndex::Signal(sig)) = bucket_to_args.get(b) {
+                    bucket_to_args.insert(
+                        *b,
+                        ArgIndex::SubCmp { signal: *sig, arena: arena_idx, counter: counter_idx },
+                    );
+                } else {
+                    //TODO: What to do when the signal index w/in the subcomp was not variant?
+                    //  Should I just add a parameter anyway? It doesn't hurt to do that so
+                    //  I guess that's the approach to take for now.
+                    bucket_to_args.insert(
+                        *b,
+                        ArgIndex::SubCmp {
+                            signal: next_idx,
+                            arena: arena_idx,
+                            counter: counter_idx,
+                        },
+                    );
+                    next_idx += 1;
+                }
+            }
+        }
+
+        //Keep only the table columns where extra parameters are necessary
+        bucket_to_itr_to_ref.retain(|k, _| bucket_to_args.contains_key(k));
+        ExtraArgsResult { bucket_to_itr_to_ref, bucket_to_args, num_args: next_idx }
+    }
+
+    fn new_filled_vec<T: Clone>(new_len: usize, value: T) -> Vec<T> {
+        let mut result = Vec::with_capacity(new_len);
+        result.resize(new_len, value);
+        result
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
new file mode 100644
index 000000000..1756b410d
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/extracted_location_updater.rs
@@ -0,0 +1,272 @@
+use indexmap::IndexMap;
+use code_producers::llvm_elements::fr::FR_IDENTITY_ARR_PTR;
+use compiler::intermediate_representation::{BucketId, InstructionPointer, new_id};
+use compiler::intermediate_representation::ir_interface::*;
+use super::body_extractor::ArgIndex;
+use super::new_u32_value;
+
+pub struct ExtractedFunctionLocationUpdater {
+    pub insert_after: InstructionList,
+}
+
+impl ExtractedFunctionLocationUpdater {
+    pub fn new() -> ExtractedFunctionLocationUpdater {
+        ExtractedFunctionLocationUpdater { insert_after: Default::default() }
+    }
+
+    fn check_load_bucket(
+        &mut self,
+        bucket: &mut LoadBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+            // Update the location information to reference the argument
+            //NOTE: This can't use AddressType::Variable or AddressType::Signal
+            //  because ExtractedFunctionLLVMIRProducer references the first two
+            //  parameters with those. So this has to use SubcmpSignal (it should
+            //  work fine because subcomps will also just be additional params).
+            bucket.address_type = AddressType::SubcmpSignal {
+                cmp_address: new_u32_value(bucket, ai.get_signal_idx()),
+                uniform_parallel_value: None,
+                counter_override: false,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.src = LocationRule::Indexed {
+                location: new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            self.check_address_type(&mut bucket.address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.src, bucket_arg_order);
+        }
+    }
+
+    fn check_store_bucket(
+        &mut self,
+        bucket: &mut StoreBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        // Check the source/RHS of the store in either case
+        self.check_instruction(&mut bucket.src, bucket_arg_order);
+        //
+        if let Some(ai) = bucket_arg_order.remove(&bucket.id) {
+            // If needed, add a StoreBucket to 'insert_after' that will call the template_run function.
+            // NOTE: This must happen before the modification step so it can use existing values from the bucket.
+            if let ArgIndex::SubCmp { arena, .. } = ai {
+                self.insert_after.push(
+                    StoreBucket {
+                        id: new_id(),
+                        source_file_id: bucket.source_file_id.clone(),
+                        line: bucket.line,
+                        message_id: bucket.message_id,
+                        context: bucket.context.clone(),
+                        dest_is_output: bucket.dest_is_output,
+                        dest_address_type: AddressType::SubcmpSignal {
+                            cmp_address: new_u32_value(bucket, arena),
+                            uniform_parallel_value: None,
+                            counter_override: false,
+                            is_output: false,
+                            //TODO: Not sure what to put here. If I put Unknown (assuming the later pass
+                            //  would correct) it crashes somewhere. What I really need is Last in the
+                            //  proper place to make it generate the *_run function at the right time
+                            //  but NoLast in locations prior to that (I think). Why isn't Unknown handled
+                            //  by the later pass deterministic subcomp pass or something? Always using
+                            //  Last here could result in the run function being called too soon.
+                            //SEE: circom/tests/subcmps/subcmps0C.circom
+                            input_information: InputInformation::Input {
+                                status: StatusInput::Last,
+                            },
+                        },
+                        dest: LocationRule::Indexed {
+                            location: new_u32_value(bucket, 0), //the value here is ignored by the 'bounded_fn' below
+                            template_header: match &bucket.dest {
+                                LocationRule::Indexed { template_header, .. } => {
+                                    template_header.clone()
+                                }
+                                LocationRule::Mapped { .. } => todo!(),
+                            },
+                        },
+                        src: new_u32_value(bucket, 0), //the value here is ignored at runtime
+                        bounded_fn: Some(String::from(FR_IDENTITY_ARR_PTR)), //NOTE: doesn't have enough arguments but it works out
+                    }
+                    .allocate(),
+                );
+                // NOTE: Not adding counter for now because it shouldn't be needed anyway and it's more work to add.
+                //  The best approach would probably be to generate Load+Compute+Store (based on what StoreBucket
+                //  would normally generate for it) in an "insert_before" list just like the "insert_after" list.
+            }
+
+            //Transform this bucket into the normal fixed-index signal reference
+            bucket.dest_address_type = AddressType::SubcmpSignal {
+                cmp_address: new_u32_value(bucket, ai.get_signal_idx()),
+                uniform_parallel_value: None,
+                counter_override: false,
+                is_output: false,
+                input_information: InputInformation::NoInput,
+            };
+            bucket.dest = LocationRule::Indexed {
+                location: new_u32_value(bucket, 0), //use index 0 to ref the entire storage array
+                template_header: None,
+            };
+        } else {
+            // If not replacing, check deeper in the AddressType and LocationRule
+            self.check_address_type(&mut bucket.dest_address_type, bucket_arg_order);
+            self.check_location_rule(&mut bucket.dest, bucket_arg_order);
+        }
+    }
+
+    fn check_location_rule(
+        &mut self,
+        location_rule: &mut LocationRule,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        match location_rule {
+            LocationRule::Indexed { location, .. } => {
+                self.check_instruction(location, bucket_arg_order);
+            }
+            LocationRule::Mapped { .. } => unreachable!(),
+        }
+    }
+
+    fn check_address_type(
+        &mut self,
+        addr_type: &mut AddressType,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        if let AddressType::SubcmpSignal { cmp_address, .. } = addr_type {
+            self.check_instruction(cmp_address, bucket_arg_order);
+        }
+    }
+
+    fn check_compute_bucket(
+        &mut self,
+        bucket: &mut ComputeBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instructions(&mut bucket.stack, bucket_arg_order);
+    }
+
+    fn check_assert_bucket(
+        &mut self,
+        bucket: &mut AssertBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(&mut bucket.evaluate, bucket_arg_order);
+    }
+
+    fn check_loop_bucket(
+        &mut self,
+        bucket: &mut LoopBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(&mut bucket.continue_condition, bucket_arg_order);
+        self.check_instructions(&mut bucket.body, bucket_arg_order);
+    }
+
+    fn check_create_cmp_bucket(
+        &mut self,
+        bucket: &mut CreateCmpBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(&mut bucket.sub_cmp_id, bucket_arg_order);
+    }
+
+    fn check_constraint_bucket(
+        &mut self,
+        bucket: &mut ConstraintBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(
+            match bucket {
+                ConstraintBucket::Substitution(i) => i,
+                ConstraintBucket::Equality(i) => i,
+            },
+            bucket_arg_order,
+        );
+    }
+
+    fn check_block_bucket(
+        &mut self,
+        bucket: &mut BlockBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instructions(&mut bucket.body, bucket_arg_order);
+    }
+
+    fn check_call_bucket(
+        &mut self,
+        bucket: &mut CallBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instructions(&mut bucket.arguments, bucket_arg_order);
+    }
+
+    fn check_branch_bucket(
+        &mut self,
+        bucket: &mut BranchBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(&mut bucket.cond, bucket_arg_order);
+        self.check_instructions(&mut bucket.if_branch, bucket_arg_order);
+        self.check_instructions(&mut bucket.else_branch, bucket_arg_order);
+    }
+
+    fn check_return_bucket(
+        &mut self,
+        bucket: &mut ReturnBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        self.check_instruction(&mut bucket.value, bucket_arg_order);
+    }
+
+    fn check_log_bucket(
+        &mut self,
+        bucket: &mut LogBucket,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        for arg in &mut bucket.argsprint {
+            if let LogBucketArg::LogExp(i) = arg {
+                self.check_instruction(i, bucket_arg_order);
+            }
+        }
+    }
+
+    //Nothing to do
+    fn check_value_bucket(&mut self, _: &mut ValueBucket, _: &mut IndexMap<BucketId, ArgIndex>) {}
+    fn check_nop_bucket(&mut self, _: &mut NopBucket, _: &mut IndexMap<BucketId, ArgIndex>) {}
+
+    fn check_instructions(
+        &mut self,
+        insts: &mut Vec<InstructionPointer>,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        for i in insts {
+            self.check_instruction(i, bucket_arg_order);
+        }
+    }
+
+    pub fn check_instruction(
+        &mut self,
+        inst: &mut InstructionPointer,
+        bucket_arg_order: &mut IndexMap<BucketId, ArgIndex>,
+    ) {
+        match inst.as_mut() {
+            Instruction::Value(ref mut b) => self.check_value_bucket(b, bucket_arg_order),
+            Instruction::Load(ref mut b) => self.check_load_bucket(b, bucket_arg_order),
+            Instruction::Store(ref mut b) => self.check_store_bucket(b, bucket_arg_order),
+            Instruction::Compute(ref mut b) => self.check_compute_bucket(b, bucket_arg_order),
+            Instruction::Call(ref mut b) => self.check_call_bucket(b, bucket_arg_order),
+            Instruction::Branch(ref mut b) => self.check_branch_bucket(b, bucket_arg_order),
+            Instruction::Return(ref mut b) => self.check_return_bucket(b, bucket_arg_order),
+            Instruction::Assert(ref mut b) => self.check_assert_bucket(b, bucket_arg_order),
+            Instruction::Log(ref mut b) => self.check_log_bucket(b, bucket_arg_order),
+            Instruction::Loop(ref mut b) => self.check_loop_bucket(b, bucket_arg_order),
+            Instruction::CreateCmp(ref mut b) => self.check_create_cmp_bucket(b, bucket_arg_order),
+            Instruction::Constraint(ref mut b) => self.check_constraint_bucket(b, bucket_arg_order),
+            Instruction::Block(ref mut b) => self.check_block_bucket(b, bucket_arg_order),
+            Instruction::Nop(ref mut b) => self.check_nop_bucket(b, bucket_arg_order),
+        }
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
new file mode 100644
index 000000000..2c79f94fd
--- /dev/null
+++ b/circuit_passes/src/passes/loop_unroll/loop_env_recorder.rs
@@ -0,0 +1,278 @@
+use std::cell::{RefCell, Ref};
+use std::collections::BTreeMap;
+use std::fmt::{Debug, Formatter};
+use indexmap::IndexMap;
+use compiler::intermediate_representation::BucketId;
+use compiler::intermediate_representation::ir_interface::*;
+use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
+use crate::bucket_interpreter::observer::InterpreterObserver;
+use crate::bucket_interpreter::value::Value;
+use crate::passes::GlobalPassData;
+use super::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
+
+/// Holds values of index variables at array loads/stores within a loop
+pub struct VariableValues<'a> {
+    pub env_at_header: Env<'a>,
+    /// The key is the ID of the load/store bucket where the reference is located.
+    /// NOTE: uses IndexMap to preserve insertion order to stabilize test output.
+    pub loadstore_to_index: IndexMap<BucketId, (AddressType, Value)>,
+}
+
+impl<'a> VariableValues<'a> {
+    pub fn new(env_at_header: Env<'a>) -> Self {
+        VariableValues { env_at_header, loadstore_to_index: Default::default() }
+    }
+}
+
+impl Debug for VariableValues<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let print_header_env = false;
+        if print_header_env {
+            write!(
+                f,
+                "\n{{\n env_at_header = {}\n loadstore_to_index = {:?}\n}}",
+                self.env_at_header, self.loadstore_to_index
+            )
+        } else {
+            write!(f, "\n  loadstore_to_index = {:?}\n", self.loadstore_to_index)
+        }
+    }
+}
+
+pub struct EnvRecorder<'a, 'd> {
+    global_data: &'d RefCell<GlobalPassData>,
+    mem: &'a PassMemory,
+    // NOTE: RefCell is needed here because the instance of this struct is borrowed by
+    //  the main interpreter while we also need to mutate these internal structures.
+    current_iter_num: RefCell<usize>,
+    safe_to_move: RefCell<bool>,
+    //NOTE: use BTreeMap instead of HashMap for consistent ordering of args in test cases
+    vals_per_iteration: RefCell<BTreeMap<usize, VariableValues<'a>>>, // key is iteration number
+}
+
+impl Debug for EnvRecorder<'_, '_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "\n current_iter_num = {}\n safe_to_move = {:?}\n vals_per_iteration = {:?}",
+            self.current_iter_num.borrow(),
+            self.safe_to_move.borrow(),
+            self.vals_per_iteration.borrow(),
+        )
+    }
+}
+
+impl<'a, 'd> EnvRecorder<'a, 'd> {
+    pub fn new(global_data: &'d RefCell<GlobalPassData>, mem: &'a PassMemory) -> Self {
+        EnvRecorder {
+            global_data,
+            mem,
+            vals_per_iteration: Default::default(),
+            current_iter_num: RefCell::new(0),
+            safe_to_move: RefCell::new(true),
+        }
+    }
+
+    pub fn get_vals_per_iter(&self) -> Ref<BTreeMap<usize, VariableValues<'a>>> {
+        self.vals_per_iteration.borrow()
+    }
+
+    pub fn is_safe_to_move(&self) -> bool {
+        *self.safe_to_move.borrow()
+    }
+
+    pub fn increment_iter(&self) {
+        *self.current_iter_num.borrow_mut() += 1;
+    }
+
+    pub fn get_iter(&self) -> usize {
+        *self.current_iter_num.borrow()
+    }
+
+    pub fn record_env_at_header(&self, env: Env<'a>) {
+        let iter = self.get_iter();
+        assert!(!self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow_mut().insert(iter, VariableValues::new(env));
+    }
+
+    pub fn get_header_env_clone(&self) -> Env {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration.borrow().get(&iter).unwrap().env_at_header.clone()
+    }
+
+    pub fn record_reverse_arg_mapping(
+        &self,
+        extract_func: String,
+        iter_env: UnrolledIterLvars,
+        value: ToOriginalLocation,
+    ) {
+        self.global_data
+            .borrow_mut()
+            .extract_func_orig_loc
+            .entry(extract_func)
+            .or_default()
+            .insert(iter_env, value);
+    }
+
+    fn record_memloc_at_bucket(&self, bucket_id: &BucketId, addr_ty: AddressType, val: Value) {
+        let iter = self.get_iter();
+        assert!(self.vals_per_iteration.borrow().contains_key(&iter));
+        self.vals_per_iteration
+            .borrow_mut()
+            .get_mut(&iter)
+            .unwrap()
+            .loadstore_to_index
+            .insert(*bucket_id, (addr_ty, val));
+    }
+
+    fn compute_index_from_inst(&self, env: &Env, location: &InstructionPointer) -> Value {
+        // Evaluate the index using the current environment and using the environment from the
+        //  loop header. If either is Unknown or they do not give the same value, then it is
+        //  not safe to move the loop body to another function because the index computation may
+        //  not give the same result when done at the call site, outside of the new function.
+        let interp = self.mem.build_interpreter(self.global_data, self);
+        let (idx_loc, _) = interp.execute_instruction(location, env.clone(), false);
+        if let Some(idx_loc) = idx_loc {
+            let (idx_header, _) =
+                interp.execute_instruction(location, self.get_header_env_clone(), false);
+            if let Some(idx_header) = idx_header {
+                if Value::eq(&idx_header, &idx_loc) {
+                    return idx_loc;
+                }
+            }
+        }
+        Value::Unknown
+    }
+
+    fn compute_index_from_rule(&self, env: &Env, loc: &LocationRule) -> Value {
+        match loc {
+            LocationRule::Mapped { .. } => {
+                //TODO: It's not an array index in this case, at least not immediately but I think it can
+                //  ultimately be converted to one because the subcmp storage is an array of values. Is
+                //  that value known now? Do I also need the AddressType to compute the correct index?
+                //SEE: https://veridise.atlassian.net/browse/VAN-704
+                Value::Unknown
+            }
+            LocationRule::Indexed { location, .. } => self.compute_index_from_inst(env, location),
+        }
+    }
+
+    fn visit(&self, bucket_id: &BucketId, addr_ty: &AddressType, loc: &LocationRule, env: &Env) {
+        let loc_result = self.compute_index_from_rule(env, loc);
+        if loc_result == Value::Unknown {
+            self.safe_to_move.replace(false);
+        }
+        //NOTE: must record even when Unknown to ensure that Unknown value is not confused with
+        //  missing values for an iteration that can be caused by conditionals within the loop.
+        if let AddressType::SubcmpSignal {
+            cmp_address,
+            uniform_parallel_value,
+            is_output,
+            input_information,
+            counter_override,
+        } = addr_ty
+        {
+            let addr_result = self.compute_index_from_inst(env, cmp_address);
+            self.record_memloc_at_bucket(
+                bucket_id,
+                AddressType::SubcmpSignal {
+                    cmp_address: {
+                        if addr_result == Value::Unknown {
+                            self.safe_to_move.replace(false);
+                            NopBucket { id: 0 }.allocate()
+                        } else {
+                            addr_result.to_value_bucket(self.mem).allocate()
+                        }
+                    },
+                    uniform_parallel_value: uniform_parallel_value.clone(),
+                    is_output: *is_output,
+                    input_information: input_information.clone(),
+                    counter_override: *counter_override,
+                },
+                loc_result,
+            );
+        } else {
+            self.record_memloc_at_bucket(bucket_id, addr_ty.clone(), loc_result);
+        }
+    }
+}
+
+impl InterpreterObserver for EnvRecorder<'_, '_> {
+    fn on_load_bucket(&self, bucket: &LoadBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.visit(&bucket.id, &bucket.address_type, &bucket.src, env);
+        true
+    }
+
+    fn on_store_bucket(&self, bucket: &StoreBucket, env: &Env) -> bool {
+        if let Some(_) = bucket.bounded_fn {
+            todo!(); //not sure if/how to handle that
+        }
+        self.visit(&bucket.id, &bucket.dest_address_type, &bucket.dest, env);
+        true
+    }
+
+    fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_compute_bucket(&self, _bucket: &ComputeBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_assert_bucket(&self, _bucket: &AssertBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_loop_bucket(&self, _bucket: &LoopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_create_cmp_bucket(&self, _bucket: &CreateCmpBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_constraint_bucket(&self, _bucket: &ConstraintBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_block_bucket(&self, _bucket: &BlockBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_nop_bucket(&self, _bucket: &NopBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_location_rule(&self, _location_rule: &LocationRule, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_call_bucket(&self, _bucket: &CallBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_branch_bucket(&self, _bucket: &BranchBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_return_bucket(&self, _bucket: &ReturnBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn on_log_bucket(&self, _bucket: &LogBucket, _env: &Env) -> bool {
+        self.is_safe_to_move() //continue observing unless something unsafe has been found
+    }
+
+    fn ignore_function_calls(&self) -> bool {
+        true
+    }
+
+    fn ignore_subcmp_calls(&self) -> bool {
+        true
+    }
+}
diff --git a/circuit_passes/src/passes/loop_unroll.rs b/circuit_passes/src/passes/loop_unroll/mod.rs
similarity index 79%
rename from circuit_passes/src/passes/loop_unroll.rs
rename to circuit_passes/src/passes/loop_unroll/mod.rs
index 8aed5c73b..d20b21b59 100644
--- a/circuit_passes/src/passes/loop_unroll.rs
+++ b/circuit_passes/src/passes/loop_unroll/mod.rs
@@ -1,5 +1,10 @@
+mod loop_env_recorder;
+mod extracted_location_updater;
+pub mod body_extractor;
+
 use std::cell::RefCell;
 use std::collections::BTreeMap;
+use std::vec;
 use compiler::circuit_design::template::TemplateCode;
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{
@@ -7,61 +12,107 @@ use compiler::intermediate_representation::{
 };
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use crate::passes::loop_unroll::loop_env_recorder::EnvRecorder;
+use super::{CircuitTransformationPass, GlobalPassData};
+use self::body_extractor::LoopBodyExtractor;
+
+const EXTRACT_LOOP_BODY_TO_NEW_FUNC: bool = true;
+
+pub fn new_u32_value(bucket: &dyn ObtainMeta, val: usize) -> InstructionPointer {
+    ValueBucket {
+        id: new_id(),
+        source_file_id: bucket.get_source_file_id().clone(),
+        line: bucket.get_line(),
+        message_id: bucket.get_message_id(),
+        parse_as: ValueType::U32,
+        op_aux_no: 0,
+        value: val,
+    }
+    .allocate()
+}
 
-pub struct LoopUnrollPass {
+pub struct LoopUnrollPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
+    memory: PassMemory,
+    extractor: LoopBodyExtractor,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
     replacements: RefCell<BTreeMap<BucketId, InstructionPointer>>,
 }
 
-impl LoopUnrollPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> LoopUnrollPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         LoopUnrollPass {
-            memory: PassMemory::new_cell(prime, String::from(""), Default::default()),
+            global_data,
+            memory: PassMemory::new(prime, String::from(""), Default::default()),
             replacements: Default::default(),
+            extractor: Default::default(),
         }
     }
 
     fn try_unroll_loop(&self, bucket: &LoopBucket, env: &Env) -> (Option<InstructionList>, usize) {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        // Compute loop iteration count. If unknown, return immediately.
+        let recorder = EnvRecorder::new(self.global_data, &self.memory);
+        {
+            let interpreter = self.memory.build_interpreter(self.global_data, &recorder);
+            let mut inner_env = env.clone();
+            loop {
+                recorder.record_env_at_header(inner_env.clone());
+                let (_, cond, new_env) =
+                    interpreter.execute_loop_bucket_once(bucket, inner_env, true);
+                match cond {
+                    // If the conditional becomes unknown just give up.
+                    None => return (None, 0),
+                    // When conditional becomes `false`, iteration count is complete.
+                    Some(false) => break,
+                    // Otherwise, continue counting.
+                    Some(true) => recorder.increment_iter(),
+                };
+                inner_env = new_env;
+            }
+        }
+
         let mut block_body = vec![];
-        let mut cond_result = Some(true);
-        let mut env = env.clone();
-        let mut iters = 0;
-        while cond_result.unwrap() {
-            let (_, new_cond, new_env) = interpreter.execute_loop_bucket_once(bucket, env, false);
-            if new_cond.is_none() {
-                return (None, 0); // If the conditional becomes Unknown just give up.
+        if EXTRACT_LOOP_BODY_TO_NEW_FUNC && recorder.is_safe_to_move() {
+            // If the loop body contains more than one instruction, extract it into a new
+            // function and generate 'recorder.get_iter()' number of calls to that function.
+            // Otherwise, just duplicate the body 'recorder.get_iter()' number of times.
+            match &bucket.body[..] {
+                [a] => {
+                    for _ in 0..recorder.get_iter() {
+                        let mut copy = a.clone();
+                        copy.update_id();
+                        block_body.push(copy);
+                    }
+                }
+                _ => {
+                    self.extractor.extract(bucket, &recorder, &mut block_body);
+                }
             }
-            cond_result = new_cond;
-            env = new_env;
-            if let Some(true) = new_cond {
-                iters += 1;
-                for inst in &bucket.body {
-                    block_body.push(inst.clone());
+        } else {
+            //If the loop body is not safe to move into a new function, just unroll.
+            for _ in 0..recorder.get_iter() {
+                for s in &bucket.body {
+                    let mut copy = s.clone();
+                    copy.update_id();
+                    block_body.push(copy);
                 }
             }
         }
-        for inst in &mut block_body {
-            inst.update_id();
-        }
-        (Some(block_body), iters)
+        (Some(block_body), recorder.get_iter())
     }
 
     // Will take the unrolled loop and interpretate it
     // checking if new loop buckets appear
     fn continue_inside(&self, bucket: &BlockBucket, env: &Env) {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
-        interpreter.execute_block_bucket(bucket, env.clone(), true);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
+        let env = Env::new_unroll_block_env(env.clone(), &self.extractor);
+        interpreter.execute_block_bucket(bucket, env, true);
     }
 }
 
-impl InterpreterObserver for LoopUnrollPass {
+impl InterpreterObserver for LoopUnrollPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -91,6 +142,7 @@ impl InterpreterObserver for LoopUnrollPass {
                 message_id: bucket.message_id,
                 body: block_body,
                 n_iters,
+                label: String::from("unrolled_loop"),
             };
             self.continue_inside(&block, env);
             self.replacements.borrow_mut().insert(bucket.id, block.allocate());
@@ -143,22 +195,38 @@ impl InterpreterObserver for LoopUnrollPass {
     }
 }
 
-impl CircuitTransformationPass for LoopUnrollPass {
+impl CircuitTransformationPass for LoopUnrollPass<'_> {
     fn name(&self) -> &str {
         "LoopUnrollPass"
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
+    }
+
+    fn post_hook_circuit(&self, cir: &mut Circuit) {
+        // Normalize return type on source functions for "WriteLLVMIR for Circuit"
+        //  which treats a 1-D vector of size 1 as a scalar return and an empty
+        //  vector as "void" return type (the initial Circuit builder uses empty
+        //  for scalar returns because it doesn't consider "void" return possible).
+        for f in &mut cir.functions {
+            if f.returns.is_empty() {
+                f.returns = vec![1];
+            }
+        }
+        // Transform and add the new body functions
+        for f in self.extractor.get_new_functions().iter() {
+            cir.functions.push(self.transform_function(&f));
+        }
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_loop_bucket(&self, bucket: &LoopBucket) -> InstructionPointer {
@@ -179,6 +247,7 @@ impl CircuitTransformationPass for LoopUnrollPass {
 
 #[cfg(test)]
 mod test {
+    use std::cell::RefCell;
     use std::collections::HashMap;
     use compiler::circuit_design::template::TemplateCodeInfo;
     use compiler::compiler_interface::Circuit;
@@ -187,13 +256,14 @@ mod test {
         AddressType, Allocate, ComputeBucket, InstrContext, LoadBucket, LocationRule, LoopBucket,
         OperatorType, StoreBucket, ValueBucket, ValueType,
     };
-    use crate::passes::CircuitTransformationPass;
+    use crate::passes::{CircuitTransformationPass, LOOP_BODY_FN_PREFIX, GlobalPassData};
     use crate::passes::loop_unroll::LoopUnrollPass;
 
     #[test]
     fn test_loop_unrolling() {
         let prime = "goldilocks".to_string();
-        let pass = LoopUnrollPass::new(&prime);
+        let global_data = RefCell::new(GlobalPassData::new());
+        let pass = LoopUnrollPass::new(prime, &global_data);
         let mut circuit = example_program();
         circuit.llvm_data.variable_index_mapping.insert("test_0".to_string(), HashMap::new());
         circuit.llvm_data.signal_index_mapping.insert("test_0".to_string(), HashMap::new());
@@ -204,7 +274,15 @@ mod test {
         }
         assert_ne!(circuit, new_circuit);
         match new_circuit.templates[0].body.last().unwrap().as_ref() {
-            Instruction::Block(b) => assert_eq!(b.body.len(), 10), // 5 iterations unrolled times 2 statements in the loop body
+            Instruction::Block(b) => {
+                // 5 iterations unrolled into 5 call statements targeting extracted loop body functions
+                assert_eq!(b.body.len(), 5);
+                assert!(b.body.iter().all(|s| if let Instruction::Call(c) = s.as_ref() {
+                    c.symbol.starts_with(LOOP_BODY_FN_PREFIX)
+                } else {
+                    false
+                }));
+            }
             _ => assert!(false),
         }
     }
diff --git a/circuit_passes/src/passes/mapped_to_indexed.rs b/circuit_passes/src/passes/mapped_to_indexed.rs
index 6e29072d0..513fc9a1a 100644
--- a/circuit_passes/src/passes/mapped_to_indexed.rs
+++ b/circuit_passes/src/passes/mapped_to_indexed.rs
@@ -5,22 +5,24 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::ir_interface::*;
 use compiler::intermediate_representation::{InstructionPointer, UpdateId};
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_offset;
 use crate::bucket_interpreter::value::Value::KnownU32;
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct MappedToIndexedPass {
+pub struct MappedToIndexedPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     replacements: RefCell<BTreeMap<LocationRule, LocationRule>>,
 }
 
-impl MappedToIndexedPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> MappedToIndexedPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         MappedToIndexedPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            global_data,
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             replacements: Default::default(),
         }
     }
@@ -32,8 +34,7 @@ impl MappedToIndexedPass {
         signal_code: usize,
         env: &Env,
     ) -> LocationRule {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
 
         let (resolved_addr, acc_env) =
             interpreter.execute_instruction(cmp_address, env.clone(), false);
@@ -44,7 +45,8 @@ impl MappedToIndexedPass {
 
         let mut acc_env = acc_env;
         let name = acc_env.get_subcmp_name(resolved_addr).clone();
-        let io_def = &mem.io_map[&acc_env.get_subcmp_template_id(resolved_addr)][signal_code];
+        let io_def =
+            self.memory.get_iodef(&acc_env.get_subcmp_template_id(resolved_addr), &signal_code);
         let map_access = io_def.offset;
         if indexes.len() > 0 {
             let mut indexes_values = vec![];
@@ -54,15 +56,13 @@ impl MappedToIndexedPass {
                 acc_env = new_env;
             }
             let offset = compute_offset(&indexes_values, &io_def.lengths);
-            let mut unused = vec![];
             LocationRule::Indexed {
-                location: KnownU32(map_access + offset).to_value_bucket(&mut unused).allocate(),
+                location: KnownU32(map_access + offset).to_value_bucket(&self.memory).allocate(),
                 template_header: Some(name),
             }
         } else {
-            let mut unused = vec![];
             LocationRule::Indexed {
-                location: KnownU32(map_access).to_value_bucket(&mut unused).allocate(),
+                location: KnownU32(map_access).to_value_bucket(&self.memory).allocate(),
                 template_header: Some(name),
             }
         }
@@ -96,7 +96,7 @@ impl MappedToIndexedPass {
     }
 }
 
-impl InterpreterObserver for MappedToIndexedPass {
+impl InterpreterObserver for MappedToIndexedPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -166,13 +166,13 @@ impl InterpreterObserver for MappedToIndexedPass {
     }
 }
 
-impl CircuitTransformationPass for MappedToIndexedPass {
+impl CircuitTransformationPass for MappedToIndexedPass<'_> {
     fn name(&self) -> &str {
         "MappedToIndexedPass"
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     /*
@@ -196,11 +196,11 @@ impl CircuitTransformationPass for MappedToIndexedPass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/circuit_passes/src/passes/memory.rs b/circuit_passes/src/passes/memory.rs
deleted file mode 100644
index 55b8631b3..000000000
--- a/circuit_passes/src/passes/memory.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-use std::cell::RefCell;
-use std::collections::HashMap;
-use code_producers::components::TemplateInstanceIOMap;
-use code_producers::llvm_elements::IndexMapping;
-use compiler::circuit_design::function::FunctionCode;
-use compiler::circuit_design::template::TemplateCode;
-use compiler::compiler_interface::Circuit;
-use crate::bucket_interpreter::BucketInterpreter;
-use crate::bucket_interpreter::env::{ContextSwitcher, FunctionsLibrary, TemplatesLibrary};
-use crate::bucket_interpreter::env::Env;
-use crate::bucket_interpreter::observer::InterpreterObserver;
-
-pub struct PassMemory {
-    pub templates_library: TemplatesLibrary,
-    pub functions_library: FunctionsLibrary,
-    pub prime: String,
-    pub constant_fields: Vec<String>,
-    pub current_scope: String,
-    pub io_map: TemplateInstanceIOMap,
-    pub signal_index_mapping: HashMap<String, IndexMapping>,
-    pub variables_index_mapping: HashMap<String, IndexMapping>,
-    pub component_addr_index_mapping: HashMap<String, IndexMapping>,
-}
-
-impl PassMemory {
-    pub fn new_cell(
-        prime: &String,
-        current_scope: String,
-        io_map: TemplateInstanceIOMap,
-    ) -> RefCell<Self> {
-        RefCell::new(PassMemory {
-            templates_library: Default::default(),
-            functions_library: Default::default(),
-            prime: prime.to_string(),
-            constant_fields: vec![],
-            current_scope,
-            io_map,
-            signal_index_mapping: Default::default(),
-            variables_index_mapping: Default::default(),
-            component_addr_index_mapping: Default::default(),
-        })
-    }
-
-    pub fn set_scope(&mut self, template: &TemplateCode) {
-        self.current_scope = template.header.clone();
-    }
-
-    pub fn run_template(&self, observer: &dyn InterpreterObserver, template: &TemplateCode) {
-        assert!(!self.current_scope.is_empty());
-        if cfg!(debug_assertions) {
-            println!("Running template {}", self.current_scope);
-        }
-        let interpreter = self.build_interpreter(observer);
-        let env = Env::new(&self.templates_library, &self.functions_library, self);
-        interpreter.execute_instructions(&template.body, env, true);
-    }
-
-    pub fn build_interpreter<'a>(
-        &'a self,
-        observer: &'a dyn InterpreterObserver,
-    ) -> BucketInterpreter {
-        self.build_interpreter_with_scope(observer, &self.current_scope)
-    }
-
-    fn build_interpreter_with_scope<'a>(
-        &'a self,
-        observer: &'a dyn InterpreterObserver,
-        scope: &'a String,
-    ) -> BucketInterpreter {
-        BucketInterpreter::init(
-            scope,
-            &self.prime,
-            &self.constant_fields,
-            observer,
-            &self.io_map,
-            &self.signal_index_mapping[scope],
-            &self.variables_index_mapping[scope],
-            &self.component_addr_index_mapping[scope],
-        )
-    }
-
-    pub fn add_template(&mut self, template: &TemplateCode) {
-        self.templates_library.insert(template.header.clone(), (*template).clone());
-    }
-
-    pub fn add_function(&mut self, function: &FunctionCode) {
-        self.functions_library.insert(function.header.clone(), (*function).clone());
-    }
-
-    pub fn fill_from_circuit(&mut self, circuit: &Circuit) {
-        for template in &circuit.templates {
-            self.add_template(template);
-        }
-        for function in &circuit.functions {
-            self.add_function(function);
-        }
-        self.constant_fields = circuit.llvm_data.field_tracking.clone();
-        self.io_map = circuit.llvm_data.io_map.clone();
-        self.variables_index_mapping = circuit.llvm_data.variable_index_mapping.clone();
-        self.signal_index_mapping = circuit.llvm_data.signal_index_mapping.clone();
-        self.component_addr_index_mapping = circuit.llvm_data.component_index_mapping.clone();
-    }
-}
-
-impl ContextSwitcher for PassMemory {
-    fn switch<'a>(
-        &'a self,
-        interpreter: &'a BucketInterpreter<'a>,
-        scope: &'a String,
-    ) -> BucketInterpreter<'a> {
-        self.build_interpreter_with_scope(interpreter.observer, scope)
-    }
-}
diff --git a/circuit_passes/src/passes/mod.rs b/circuit_passes/src/passes/mod.rs
index 7f8db765d..a32498791 100644
--- a/circuit_passes/src/passes/mod.rs
+++ b/circuit_passes/src/passes/mod.rs
@@ -1,25 +1,29 @@
 use std::cell::RefCell;
+use std::collections::{HashMap, BTreeMap};
 use compiler::circuit_design::function::{FunctionCode, FunctionCodeInfo};
 use compiler::circuit_design::template::{TemplateCode, TemplateCodeInfo};
 use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{Instruction, InstructionList, InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
+use code_producers::llvm_elements::stdlib::GENERATED_FN_PREFIX;
 use crate::passes::{
-    conditional_flattening::ConditionalFlattening,
+    checks::assert_unique_ids_in_circuit, conditional_flattening::ConditionalFlatteningPass,
     deterministic_subcomponent_invocation::DeterministicSubCmpInvokePass,
     loop_unroll::LoopUnrollPass, mapped_to_indexed::MappedToIndexedPass,
     simplification::SimplificationPass, unknown_index_sanitization::UnknownIndexSanitizationPass,
 };
-use crate::passes::checks::assert_unique_ids_in_circuit;
+
+use self::loop_unroll::body_extractor::{UnrolledIterLvars, ToOriginalLocation};
 
 mod conditional_flattening;
-mod loop_unroll;
-mod memory;
 mod simplification;
 mod deterministic_subcomponent_invocation;
 mod mapped_to_indexed;
 mod unknown_index_sanitization;
 mod checks;
+pub mod loop_unroll;
+
+pub const LOOP_BODY_FN_PREFIX: &str = const_format::concatcp!(GENERATED_FN_PREFIX, "loop.body.");
 
 macro_rules! pre_hook {
     ($name: ident, $bucket_ty: ty) => {
@@ -34,13 +38,15 @@ pub trait CircuitTransformationPass {
         self.pre_hook_circuit(&circuit);
         let templates = circuit.templates.iter().map(|t| self.transform_template(t)).collect();
         let field_tracking = self.get_updated_field_constants();
-        Circuit {
+        let mut new_circuit = Circuit {
             wasm_producer: circuit.wasm_producer.clone(),
             c_producer: circuit.c_producer.clone(),
             llvm_data: circuit.llvm_data.clone_with_new_field_tracking(field_tracking),
             templates,
             functions: circuit.functions.iter().map(|f| self.transform_function(f)).collect(),
-        }
+        };
+        self.post_hook_circuit(&mut new_circuit);
+        new_circuit
     }
 
     fn get_updated_field_constants(&self) -> Vec<String>;
@@ -150,11 +156,13 @@ pub trait CircuitTransformationPass {
                 uniform_parallel_value,
                 is_output,
                 input_information,
+                counter_override,
             } => AddressType::SubcmpSignal {
                 cmp_address: self.transform_instruction(cmp_address),
                 uniform_parallel_value: uniform_parallel_value.clone(),
                 is_output: *is_output,
                 input_information: input_information.clone(),
+                counter_override: *counter_override,
             },
             x => x.clone(),
         }
@@ -360,6 +368,7 @@ pub trait CircuitTransformationPass {
             message_id: bucket.message_id,
             body: self.transform_instructions(&bucket.body),
             n_iters: bucket.n_iters,
+            label: bucket.label.clone(),
         }
         .allocate()
     }
@@ -368,6 +377,8 @@ pub trait CircuitTransformationPass {
         NopBucket { id: new_id() }.allocate()
     }
 
+    fn post_hook_circuit(&self, _cir: &mut Circuit) {}
+
     pre_hook!(pre_hook_circuit, Circuit);
     pre_hook!(pre_hook_template, TemplateCode);
     pre_hook!(pre_hook_function, FunctionCode);
@@ -388,10 +399,31 @@ pub trait CircuitTransformationPass {
     pre_hook!(pre_hook_nop_bucket, NopBucket);
 }
 
-pub type Passes = RefCell<Vec<Box<dyn CircuitTransformationPass>>>;
+pub enum PassKind {
+    LoopUnroll,
+    Simplification,
+    ConditionalFlattening,
+    DeterministicSubCmpInvoke,
+    MappedToIndexed,
+    UnknownIndexSanitization,
+}
+
+pub struct GlobalPassData {
+    /// Created during loop unrolling, maps generated function name + UnrolledIterLvars
+    /// (from Env::get_vars_sort) to location reference in the original function. Used
+    /// by ExtractedFuncEnvData to access the original function's Env via the extracted
+    /// function's parameter references.
+    pub extract_func_orig_loc: HashMap<String, BTreeMap<UnrolledIterLvars, ToOriginalLocation>>,
+}
+
+impl GlobalPassData {
+    pub fn new() -> GlobalPassData {
+        GlobalPassData { extract_func_orig_loc: Default::default() }
+    }
+}
 
 pub struct PassManager {
-    passes: Passes,
+    passes: RefCell<Vec<PassKind>>,
 }
 
 impl PassManager {
@@ -399,39 +431,68 @@ impl PassManager {
         PassManager { passes: Default::default() }
     }
 
-    pub fn schedule_loop_unroll_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(LoopUnrollPass::new(prime)));
+    pub fn schedule_loop_unroll_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::LoopUnroll);
         self
     }
 
-    pub fn schedule_simplification_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(SimplificationPass::new(prime)));
+    pub fn schedule_simplification_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::Simplification);
         self
     }
 
-    pub fn schedule_conditional_flattening_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(ConditionalFlattening::new(prime)));
+    pub fn schedule_conditional_flattening_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::ConditionalFlattening);
         self
     }
 
-    pub fn schedule_deterministic_subcmp_invoke_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(DeterministicSubCmpInvokePass::new(prime)));
+    pub fn schedule_deterministic_subcmp_invoke_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::DeterministicSubCmpInvoke);
         self
     }
 
-    pub fn schedule_mapped_to_indexed_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(MappedToIndexedPass::new(prime)));
+    pub fn schedule_mapped_to_indexed_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::MappedToIndexed);
         self
     }
 
-    pub fn schedule_unknown_index_sanitization_pass(&self, prime: &String) -> &Self {
-        self.passes.borrow_mut().push(Box::new(UnknownIndexSanitizationPass::new(prime)));
+    pub fn schedule_unknown_index_sanitization_pass(&self) -> &Self {
+        self.passes.borrow_mut().push(PassKind::UnknownIndexSanitization);
         self
     }
 
-    pub fn transform_circuit(&self, circuit: Circuit) -> Circuit {
+    fn build_pass<'d>(
+        kind: PassKind,
+        prime: &String,
+        global_data: &'d RefCell<GlobalPassData>,
+    ) -> Box<dyn CircuitTransformationPass + 'd> {
+        match kind {
+            PassKind::LoopUnroll => Box::new(LoopUnrollPass::new(prime.clone(), global_data)),
+            PassKind::Simplification => {
+                Box::new(SimplificationPass::new(prime.clone(), global_data))
+            }
+            PassKind::ConditionalFlattening => {
+                Box::new(ConditionalFlatteningPass::new(prime.clone(), global_data))
+            }
+            PassKind::DeterministicSubCmpInvoke => {
+                Box::new(DeterministicSubCmpInvokePass::new(prime.clone(), global_data))
+            }
+            PassKind::MappedToIndexed => {
+                Box::new(MappedToIndexedPass::new(prime.clone(), global_data))
+            }
+            PassKind::UnknownIndexSanitization => {
+                Box::new(UnknownIndexSanitizationPass::new(prime.clone(), global_data))
+            }
+        }
+    }
+
+    pub fn transform_circuit(&self, circuit: Circuit, prime: &String) -> Circuit {
+        // NOTE: Used RefCell rather than a mutable reference because storing
+        //  the mutable reference in EnvRecorder was causing rustc errors.
+        let global_data = RefCell::new(GlobalPassData::new());
         let mut transformed_circuit = circuit;
-        for pass in self.passes.borrow().iter() {
+        for kind in self.passes.borrow_mut().drain(..) {
+            let pass = Self::build_pass(kind, prime, &global_data);
             if cfg!(debug_assertions) {
                 println!("Do {}...", pass.name());
             }
diff --git a/circuit_passes/src/passes/simplification.rs b/circuit_passes/src/passes/simplification.rs
index 4868231da..81275e883 100644
--- a/circuit_passes/src/passes/simplification.rs
+++ b/circuit_passes/src/passes/simplification.rs
@@ -5,29 +5,32 @@ use compiler::compiler_interface::Circuit;
 use compiler::intermediate_representation::{InstructionPointer, new_id};
 use compiler::intermediate_representation::ir_interface::*;
 use crate::bucket_interpreter::env::Env;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::value::Value;
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use super::{CircuitTransformationPass, GlobalPassData};
 
-pub struct SimplificationPass {
+pub struct SimplificationPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     compute_replacements: RefCell<BTreeMap<ComputeBucket, Value>>,
     call_replacements: RefCell<BTreeMap<CallBucket, Value>>,
+    //TODO: could use BucketId instead of cloning buckets for keys
 }
 
-impl SimplificationPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> SimplificationPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         SimplificationPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            global_data,
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             compute_replacements: Default::default(),
             call_replacements: Default::default(),
         }
     }
 }
 
-impl InterpreterObserver for SimplificationPass {
+impl InterpreterObserver for SimplificationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -42,8 +45,7 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_compute_bucket(&self, bucket: &ComputeBucket, env: &Env) -> bool {
         let env = env.clone();
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (eval, _) = interpreter.execute_compute_bucket(bucket, env, false);
         let eval = eval.expect("Compute bucket must produce a value!");
         if !eval.is_unknown() {
@@ -83,8 +85,7 @@ impl InterpreterObserver for SimplificationPass {
 
     fn on_call_bucket(&self, bucket: &CallBucket, env: &Env) -> bool {
         let env = env.clone();
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let interpreter = self.memory.build_interpreter(self.global_data, self);
         let (eval, _) = interpreter.execute_call_bucket(bucket, env, false);
         if let Some(eval) = eval {
             // Call buckets may not return a value directly
@@ -117,19 +118,18 @@ impl InterpreterObserver for SimplificationPass {
     }
 }
 
-impl CircuitTransformationPass for SimplificationPass {
+impl CircuitTransformationPass for SimplificationPass<'_> {
     fn name(&self) -> &str {
         "SimplificationPass"
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn transform_compute_bucket(&self, bucket: &ComputeBucket) -> InstructionPointer {
         if let Some(value) = self.compute_replacements.borrow().get(&bucket) {
-            let constant_fields = &mut self.memory.borrow_mut().constant_fields;
-            return value.to_value_bucket(constant_fields).allocate();
+            return value.to_value_bucket(&self.memory).allocate();
         }
         ComputeBucket {
             id: new_id(),
@@ -145,8 +145,7 @@ impl CircuitTransformationPass for SimplificationPass {
 
     fn transform_call_bucket(&self, bucket: &CallBucket) -> InstructionPointer {
         if let Some(value) = self.call_replacements.borrow().get(&bucket) {
-            let constant_fields = &mut self.memory.borrow_mut().constant_fields;
-            return value.to_value_bucket(constant_fields).allocate();
+            return value.to_value_bucket(&self.memory).allocate();
         }
         CallBucket {
             id: new_id(),
@@ -163,11 +162,11 @@ impl CircuitTransformationPass for SimplificationPass {
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/circuit_passes/src/passes/unknown_index_sanitization.rs b/circuit_passes/src/passes/unknown_index_sanitization.rs
index 68d92da9d..c4f3ac18e 100644
--- a/circuit_passes/src/passes/unknown_index_sanitization.rs
+++ b/circuit_passes/src/passes/unknown_index_sanitization.rs
@@ -9,12 +9,12 @@ use compiler::num_bigint::BigInt;
 use code_producers::llvm_elements::array_switch::{get_array_load_symbol, get_array_store_symbol};
 use program_structure::constants::UsefulConstants;
 use crate::bucket_interpreter::env::Env;
-use crate::bucket_interpreter::R;
+use crate::bucket_interpreter::memory::PassMemory;
 use crate::bucket_interpreter::observer::InterpreterObserver;
 use crate::bucket_interpreter::operations::compute_operation;
+use crate::bucket_interpreter::R;
 use crate::bucket_interpreter::value::Value::{KnownU32, KnownBigInt};
-use crate::passes::CircuitTransformationPass;
-use crate::passes::memory::PassMemory;
+use super::{CircuitTransformationPass, GlobalPassData};
 
 struct ZeroingInterpreter<'a> {
     pub constant_fields: &'a Vec<String>,
@@ -81,9 +81,10 @@ impl<'a> ZeroingInterpreter<'a> {
     }
 }
 
-pub struct UnknownIndexSanitizationPass {
+pub struct UnknownIndexSanitizationPass<'d> {
+    global_data: &'d RefCell<GlobalPassData>,
     // Wrapped in a RefCell because the reference to the static analysis is immutable but we need mutability
-    memory: RefCell<PassMemory>,
+    memory: PassMemory,
     load_replacements: RefCell<BTreeMap<LoadBucket, Range<usize>>>,
     store_replacements: RefCell<BTreeMap<StoreBucket, Range<usize>>>,
 }
@@ -91,10 +92,11 @@ pub struct UnknownIndexSanitizationPass {
 /**
  * The goal of this pass is to
  */
-impl UnknownIndexSanitizationPass {
-    pub fn new(prime: &String) -> Self {
+impl<'d> UnknownIndexSanitizationPass<'d> {
+    pub fn new(prime: String, global_data: &'d RefCell<GlobalPassData>) -> Self {
         UnknownIndexSanitizationPass {
-            memory: PassMemory::new_cell(prime, "".to_string(), Default::default()),
+            global_data,
+            memory: PassMemory::new(prime, "".to_string(), Default::default()),
             load_replacements: Default::default(),
             store_replacements: Default::default(),
         }
@@ -106,16 +108,6 @@ impl UnknownIndexSanitizationPass {
         location: &LocationRule,
         env: &Env,
     ) -> Range<usize> {
-        let mem = self.memory.borrow();
-        let interpreter = ZeroingInterpreter::init(&mem.prime, &mem.constant_fields);
-        let current_scope = &mem.current_scope;
-
-        let mapping = match address {
-            AddressType::Variable => &mem.variables_index_mapping[current_scope],
-            AddressType::Signal => &mem.signal_index_mapping[current_scope],
-            AddressType::SubcmpSignal { .. } => &mem.component_addr_index_mapping[current_scope],
-        };
-
         /*
          * We assume locations are of the form:
          *      (base_offset + (mul_offset * UNKNOWN))
@@ -126,17 +118,25 @@ impl UnknownIndexSanitizationPass {
          * a similar pattern that is also handled here.
          */
         match location {
+            LocationRule::Mapped { .. } => unreachable!(),
             LocationRule::Indexed { location, .. } => {
+                let mem = &self.memory;
+                let constant_fields = mem.get_field_constants_clone();
+                let interpreter = ZeroingInterpreter::init(mem.get_prime(), &constant_fields);
                 let (res, _) = interpreter.execute_instruction(location, env.clone());
 
                 let offset = match res {
                     Some(KnownU32(base)) => base,
                     _ => unreachable!(),
                 };
-
-                mapping[&offset].clone()
+                match address {
+                    AddressType::Variable => mem.get_current_scope_variables_index_mapping(&offset),
+                    AddressType::Signal => mem.get_current_scope_signal_index_mapping(&offset),
+                    AddressType::SubcmpSignal { .. } => {
+                        mem.get_current_scope_component_addr_index_mapping(&offset)
+                    }
+                }
             }
-            LocationRule::Mapped { .. } => unreachable!(),
         }
     }
 
@@ -146,8 +146,8 @@ impl UnknownIndexSanitizationPass {
         location: &LocationRule,
         env: &Env,
     ) -> bool {
-        let mem = self.memory.borrow();
-        let interpreter = mem.build_interpreter(self);
+        let mem = &self.memory;
+        let interpreter = mem.build_interpreter(self.global_data, self);
 
         let resolved_addr = match location {
             LocationRule::Indexed { location, .. } => {
@@ -166,7 +166,7 @@ impl UnknownIndexSanitizationPass {
  * - loads with a function call that returns the loaded value
  * - stores with a function call that performs the store
  */
-impl InterpreterObserver for UnknownIndexSanitizationPass {
+impl InterpreterObserver for UnknownIndexSanitizationPass<'_> {
     fn on_value_bucket(&self, _bucket: &ValueBucket, _env: &Env) -> bool {
         true
     }
@@ -248,7 +248,7 @@ impl InterpreterObserver for UnknownIndexSanitizationPass {
     }
 }
 
-impl CircuitTransformationPass for UnknownIndexSanitizationPass {
+impl CircuitTransformationPass for UnknownIndexSanitizationPass<'_> {
     fn name(&self) -> &str {
         "UnknownIndexSanitizationPass"
     }
@@ -291,15 +291,15 @@ impl CircuitTransformationPass for UnknownIndexSanitizationPass {
     }
 
     fn get_updated_field_constants(&self) -> Vec<String> {
-        self.memory.borrow().constant_fields.clone()
+        self.memory.get_field_constants_clone()
     }
 
     fn pre_hook_circuit(&self, circuit: &Circuit) {
-        self.memory.borrow_mut().fill_from_circuit(circuit);
+        self.memory.fill_from_circuit(circuit);
     }
 
     fn pre_hook_template(&self, template: &TemplateCode) {
-        self.memory.borrow_mut().set_scope(template);
-        self.memory.borrow().run_template(self, template);
+        self.memory.set_scope(template);
+        self.memory.run_template(self.global_data, self, template);
     }
 }
diff --git a/code_producers/src/llvm_elements/fr.rs b/code_producers/src/llvm_elements/fr.rs
index cc994aa08..581ea1180 100644
--- a/code_producers/src/llvm_elements/fr.rs
+++ b/code_producers/src/llvm_elements/fr.rs
@@ -1,16 +1,19 @@
+use inkwell::attributes::{Attribute, AttributeLoc};
+use inkwell::values::FunctionValue;
+
 use crate::llvm_elements::LLVMIRProducer;
-use crate::llvm_elements::functions::create_bb;
-use crate::llvm_elements::functions::create_function;
+use crate::llvm_elements::functions::{create_bb, create_function};
 use crate::llvm_elements::instructions::{
     create_add, create_sub, create_mul, create_div, create_mod, create_pow, create_eq, create_neq,
-    create_lt, create_gt, create_le, create_ge, create_neg, create_shl, create_shr, create_bit_and,
-    create_bit_or, create_bit_xor, create_logic_and, create_logic_or, create_logic_not,
-    create_return, create_cast_to_addr,
+    create_lt, create_gt, create_le, create_ge, create_gep, create_neg, create_shl, create_shr,
+    create_bit_and, create_bit_or, create_bit_xor, create_logic_and, create_logic_or,
+    create_logic_not, create_return, create_cast_to_addr,
 };
 use crate::llvm_elements::types::{bigint_type, bool_type, i32_type, void_type};
 
 use super::instructions::create_array_copy;
-use super::instructions::{create_inv, create_return_void};
+use super::instructions::{create_inv, create_return_void, pointer_cast};
+use super::values::zero;
 
 pub const FR_ADD_FN_NAME: &str = "fr_add";
 pub const FR_SUB_FN_NAME: &str = "fr_sub";
@@ -37,6 +40,21 @@ pub const FR_LOR_FN_NAME: &str = "fr_logic_or";
 pub const FR_LNOT_FN_NAME: &str = "fr_logic_not";
 pub const FR_ADDR_CAST_FN_NAME: &str = "fr_cast_to_addr";
 pub const FR_ARRAY_COPY_FN_NAME: &str = "fr_copy_n";
+pub const FR_INDEX_ARR_PTR: &str = "index_arr_ptr";
+pub const FR_IDENTITY_ARR_PTR: &str = "identity_arr_ptr";
+pub const FR_PTR_CAST_I32_I256: &str = "cast_ptr_i32_i256";
+pub const FR_PTR_CAST_I256_I32: &str = "cast_ptr_i256_i32";
+pub const FR_NULL_I256_ARR_PTR: &str = "null_i256_arr_ptr";
+pub const FR_NULL_I256_PTR: &str = "null_i256_ptr";
+
+macro_rules! fr_nullary_op {
+    ($name: expr, $producer: expr, $retTy: expr) => {{
+        let func = create_function($producer, &None, 0, "", $name, $retTy.fn_type(&[], false));
+        let main = create_bb($producer, func, $name);
+        $producer.set_current_bb(main);
+        func
+    }};
+}
 
 macro_rules! fr_unary_op_base {
     ($name: expr, $producer: expr, $argTy: expr, $retTy: expr) => {{
@@ -46,13 +64,13 @@ macro_rules! fr_unary_op_base {
         $producer.set_current_bb(main);
 
         let lhs = func.get_nth_param(0).unwrap();
-        lhs
+        (lhs, func)
     }};
 }
 
 macro_rules! fr_unary_op {
     ($name: expr, $producer: expr, $valTy: expr) => {{
-        fr_unary_op_base!($name, $producer, $valTy, $valTy)
+        fr_unary_op_base!($name, $producer, $valTy, $valTy).0
     }};
 }
 
@@ -89,26 +107,35 @@ macro_rules! fr_binary_op_bigint_to_bool {
     }};
 }
 
-pub fn add_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn add_inline_attribute<'a>(producer: &dyn LLVMIRProducer<'a>, func: FunctionValue) {
+    func.add_attribute(
+        AttributeLoc::Function,
+        producer
+            .context()
+            .create_enum_attribute(Attribute::get_named_enum_kind_id("alwaysinline"), 1),
+    );
+}
+
+fn add_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_ADD_FN_NAME, producer);
     let add = create_add(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
-pub fn sub_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn sub_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SUB_FN_NAME, producer);
     let add = create_sub(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
-pub fn mul_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn mul_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_MUL_FN_NAME, producer);
     let add = create_mul(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, add.into_int_value());
 }
 
 // Multiplication by the inverse
-pub fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_DIV_FN_NAME, producer);
     let inv = create_inv(producer, rhs.into_int_value());
     let res = create_mul(producer, lhs.into_int_value(), inv.into_int_value());
@@ -116,20 +143,20 @@ pub fn div_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
 }
 
 // Quotient of the integer division
-pub fn intdiv_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn intdiv_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_INTDIV_FN_NAME, producer);
     let res = create_div(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
 // Remainder of the integer division
-pub fn mod_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn mod_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_MOD_FN_NAME, producer);
     let div = create_mod(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, div.into_int_value());
 }
 
-pub fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_POW_FN_NAME, producer);
     let f = producer
         .llvm()
@@ -140,79 +167,79 @@ pub fn pow_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn eq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn eq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_EQ_FN_NAME, producer);
     let eq = create_eq(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, eq.into_int_value());
 }
 
-pub fn neq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn neq_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_NEQ_FN_NAME, producer);
     let neq = create_neq(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, neq.into_int_value());
 }
 
-pub fn lt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn lt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_LT_FN_NAME, producer);
     let res = create_lt(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn gt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn gt_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_GT_FN_NAME, producer);
     let res = create_gt(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn le_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn le_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_LE_FN_NAME, producer);
     let res = create_le(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn ge_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn ge_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint_to_bool!(FR_GE_FN_NAME, producer);
     let res = create_ge(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn neg_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn neg_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let arg = fr_unary_op!(FR_NEG_FN_NAME, producer, bigint_type(producer));
     let neg = create_neg(producer, arg.into_int_value());
     create_return(producer, neg.into_int_value());
 }
 
-pub fn shl_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn shl_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SHL_FN_NAME, producer);
     let res = create_shl(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn shr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn shr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_SHR_FN_NAME, producer);
     let res = create_shr(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITAND_FN_NAME, producer);
     let res = create_bit_and(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITOR_FN_NAME, producer);
     let res = create_bit_or(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_xor_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_xor_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bigint!(FR_BITXOR_FN_NAME, producer);
     let res = create_bit_xor(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let ty = bigint_type(producer);
     let arg = fr_unary_op!(FR_BITFLIP_FN_NAME, producer, ty);
     // ~x <=> xor(x, 0xFF...)
@@ -220,26 +247,26 @@ pub fn bit_flip_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_and_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bool!(FR_LAND_FN_NAME, producer);
     let res = create_logic_and(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_or_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let (lhs, rhs) = fr_binary_op_bool!(FR_LOR_FN_NAME, producer);
     let res = create_logic_or(producer, lhs.into_int_value(), rhs.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn logic_not_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn logic_not_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let arg = fr_unary_op!(FR_LNOT_FN_NAME, producer, bool_type(producer));
     let res = create_logic_not(producer, arg.into_int_value());
     create_return(producer, res.into_int_value());
 }
 
-pub fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
-    let arg = fr_unary_op_base!(
+fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let (arg, _) = fr_unary_op_base!(
         FR_ADDR_CAST_FN_NAME,
         producer,
         bigint_type(producer),
@@ -249,7 +276,7 @@ pub fn addr_cast_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     create_return(producer, res.into_int_value());
 }
 
-pub fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let ptr_ty = bigint_type(producer).ptr_type(Default::default());
     let args = &[ptr_ty.into(), ptr_ty.into(), i32_type(producer).into()];
     let func = create_function(
@@ -266,11 +293,85 @@ pub fn array_copy_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
     let src = func.get_nth_param(0).unwrap();
     let dst = func.get_nth_param(1).unwrap();
     let len = func.get_nth_param(2).unwrap();
-    create_array_copy(producer, func, src.into_pointer_value(), dst.into_pointer_value(), len.into_int_value());
+    create_array_copy(
+        producer,
+        func,
+        src.into_pointer_value(),
+        dst.into_pointer_value(),
+        len.into_int_value(),
+    );
 
     create_return_void(producer);
 }
 
+fn index_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let bigint_ty = bigint_type(producer);
+    let ret_ty = bigint_ty.ptr_type(Default::default());
+    let val_ty = bigint_ty.array_type(0).ptr_type(Default::default());
+    let func = create_function(
+        producer,
+        &None,
+        0,
+        "",
+        FR_INDEX_ARR_PTR,
+        ret_ty.fn_type(&[val_ty.into(), bigint_ty.into()], false),
+    );
+    add_inline_attribute(producer, func);
+
+    let arr = func.get_nth_param(0).unwrap();
+    let idx = func.get_nth_param(1).unwrap();
+    arr.set_name("arr");
+    idx.set_name("idx");
+
+    let main = create_bb(producer, func, FR_INDEX_ARR_PTR);
+    producer.set_current_bb(main);
+    let gep =
+        create_gep(producer, arr.into_pointer_value(), &[zero(producer), idx.into_int_value()]);
+    create_return(producer, gep.into_pointer_value());
+}
+
+fn identity_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty = bigint_type(producer).array_type(0).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_IDENTITY_ARR_PTR, producer, ty, ty);
+    add_inline_attribute(producer, func);
+    // Just return the parameter
+    create_return(producer, res);
+}
+
+fn ptr_cast_i32_i256_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty_32 = i32_type(producer).ptr_type(Default::default());
+    let ty_256 = bigint_type(producer).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_PTR_CAST_I32_I256, producer, ty_32, ty_256);
+    add_inline_attribute(producer, func);
+    // Cast the i32* to i256* and return
+    create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_256));
+}
+
+fn ptr_cast_i256_i32_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let ty_32 = i32_type(producer).ptr_type(Default::default());
+    let ty_256 = bigint_type(producer).ptr_type(Default::default());
+    let (res, func) = fr_unary_op_base!(FR_PTR_CAST_I256_I32, producer, ty_256, ty_32);
+    add_inline_attribute(producer, func);
+    // Cast the i256* to i32* and return
+    create_return(producer, pointer_cast(producer, res.into_pointer_value(), ty_32));
+}
+
+fn null_i256_arr_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let base_ty = bigint_type(producer).array_type(0).ptr_type(Default::default());
+    let func = fr_nullary_op!(FR_NULL_I256_ARR_PTR, producer, base_ty);
+    add_inline_attribute(producer, func);
+    // Just return null value for the proper pointer type
+    create_return(producer, base_ty.const_null());
+}
+
+fn null_i256_ptr_fn<'a>(producer: &dyn LLVMIRProducer<'a>) {
+    let base_ty = bigint_type(producer).ptr_type(Default::default());
+    let func = fr_nullary_op!(FR_NULL_I256_PTR, producer, base_ty);
+    add_inline_attribute(producer, func);
+    // Just return null value for the proper pointer type
+    create_return(producer, base_ty.const_null());
+}
+
 pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     add_fn(producer);
     sub_fn(producer);
@@ -296,5 +397,11 @@ pub fn load_fr<'a>(producer: &dyn LLVMIRProducer<'a>) {
     logic_not_fn(producer);
     addr_cast_fn(producer);
     array_copy_fn(producer);
+    index_arr_ptr_fn(producer);
+    identity_arr_ptr_fn(producer);
+    ptr_cast_i32_i256_fn(producer);
+    ptr_cast_i256_i32_fn(producer);
+    null_i256_arr_ptr_fn(producer);
+    null_i256_ptr_fn(producer);
     pow_fn(producer); //uses functions generated by mul_fn & lt_fn
 }
diff --git a/code_producers/src/llvm_elements/functions.rs b/code_producers/src/llvm_elements/functions.rs
index 67ebbb2ce..2d0c1cec7 100644
--- a/code_producers/src/llvm_elements/functions.rs
+++ b/code_producers/src/llvm_elements/functions.rs
@@ -5,9 +5,9 @@ use inkwell::debug_info::AsDIScope;
 use inkwell::types::FunctionType;
 use inkwell::values::{AnyValueEnum, ArrayValue, FunctionValue, IntValue, PointerValue};
 
-use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer};
+use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer, TemplateCtx};
 use crate::llvm_elements::instructions::create_gep;
-use crate::llvm_elements::template::TemplateCtx;
+use crate::llvm_elements::values::zero;
 
 pub fn create_function<'a>(
     producer: &dyn LLVMIRProducer<'a>,
@@ -50,10 +50,40 @@ pub fn create_bb<'a>(
     producer.context().append_basic_block(func, name)
 }
 
+struct FunctionCtx<'a> {
+    current_function: FunctionValue<'a>,
+    arena: PointerValue<'a>,
+}
+
+impl<'a> FunctionCtx<'a> {
+    fn new(current_function: FunctionValue<'a>) -> Self {
+        FunctionCtx {
+            current_function,
+            arena: current_function
+                .get_nth_param(0)
+                .expect("Function needs at least one argument for the arena!")
+                .into_pointer_value(),
+        }
+    }
+}
+
+impl<'a> BodyCtx<'a> for FunctionCtx<'a> {
+    fn get_variable(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        create_gep(producer, self.arena, &[index])
+    }
+
+    fn get_variable_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+        self.arena.into()
+    }
+}
+
 pub struct FunctionLLVMIRProducer<'ctx: 'prod, 'prod> {
     parent: &'prod dyn LLVMIRProducer<'ctx>,
-    function_ctx: FunctionCtx<'ctx>,
-    current_function: FunctionValue<'ctx>,
+    ctx: FunctionCtx<'ctx>,
 }
 
 impl<'ctx, 'prod> FunctionLLVMIRProducer<'ctx, 'prod> {
@@ -61,11 +91,7 @@ impl<'ctx, 'prod> FunctionLLVMIRProducer<'ctx, 'prod> {
         producer: &'prod dyn LLVMIRProducer<'ctx>,
         current_function: FunctionValue<'ctx>,
     ) -> Self {
-        FunctionLLVMIRProducer {
-            parent: producer,
-            function_ctx: FunctionCtx::new(current_function),
-            current_function,
-        }
+        FunctionLLVMIRProducer { parent: producer, ctx: FunctionCtx::new(current_function) }
     }
 }
 
@@ -82,16 +108,16 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
         self.parent.set_current_bb(bb)
     }
 
-    fn template_ctx(&self) -> &TemplateCtx<'ctx> {
+    fn template_ctx(&self) -> &dyn TemplateCtx<'ctx> {
         self.parent.template_ctx()
     }
 
     fn body_ctx(&self) -> &dyn BodyCtx<'ctx> {
-        &self.function_ctx
+        &self.ctx
     }
 
     fn current_function(&self) -> FunctionValue<'ctx> {
-        self.current_function
+        self.ctx.current_function
     }
 
     fn builder(&self) -> &Builder<'ctx> {
@@ -107,31 +133,146 @@ impl<'ctx, 'prod> LLVMIRProducer<'ctx> for FunctionLLVMIRProducer<'ctx, 'prod> {
     }
 }
 
-pub struct FunctionCtx<'a> {
-    arena: PointerValue<'a>,
+struct ExtractedFunctionCtx<'a> {
+    current_function: FunctionValue<'a>,
+    // NOTE: The 'lvars' [0 x i256]* parameter must always be present (at position 0).
+    //  The 'signals' [0 x i256]* parameter (at position 1) is optional (to allow
+    //  this to handle the generated array index load functions for the unroller).
+    args: Vec<PointerValue<'a>>,
 }
 
-impl<'a> FunctionCtx<'a> {
-    pub fn new(current_function: FunctionValue<'a>) -> Self {
-        FunctionCtx {
-            arena: current_function
-                .get_nth_param(0)
-                .expect("Function needs at least one argument for the arena!")
-                .into_pointer_value(),
+impl<'a> ExtractedFunctionCtx<'a> {
+    fn new(current_function: FunctionValue<'a>) -> Self {
+        ExtractedFunctionCtx {
+            current_function,
+            args: current_function
+                .get_param_iter()
+                .map(|x| x.into_pointer_value())
+                .collect::<Vec<_>>(),
         }
     }
+
+    fn get_lvars_ptr(&self) -> PointerValue<'a> {
+        *self.args.get(0).expect("Function must have at least 1 argument for lvar array!")
+    }
+
+    fn get_signals_ptr(&self) -> PointerValue<'a> {
+        *self.args.get(1).expect(
+            format!("No signals argument for {:?}", self.current_function.get_name()).as_str(),
+        )
+    }
 }
 
-impl<'a> BodyCtx<'a> for FunctionCtx<'a> {
+impl<'a> BodyCtx<'a> for ExtractedFunctionCtx<'a> {
     fn get_variable(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         index: IntValue<'a>,
     ) -> AnyValueEnum<'a> {
-        create_gep(producer, self.arena, &[index])
+        //'gep' must read through the pointer with 0 and then index the array
+        create_gep(producer, self.get_lvars_ptr(), &[zero(producer), index])
     }
 
     fn get_variable_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
-        self.arena.into()
+        self.get_lvars_ptr().into()
+    }
+}
+
+impl<'a> TemplateCtx<'a> for ExtractedFunctionCtx<'a> {
+    fn load_subcmp(
+        &self,
+        _producer: &dyn LLVMIRProducer<'a>,
+        _id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a> {
+        unreachable!()
+    }
+
+    fn load_subcmp_addr(
+        &self,
+        _producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a> {
+        let num = id
+            .into_int_value()
+            .get_zero_extended_constant()
+            .expect("must reference a constant argument index");
+        *self.args.get(num as usize).expect("must reference a known argument index")
+    }
+
+    fn load_subcmp_counter(
+        &self,
+        _producer: &dyn LLVMIRProducer<'a>,
+        _id: AnyValueEnum<'a>,
+    ) -> Option<PointerValue<'a>> {
+        // Use None to force StoreBucket::produce_llvm_ir to skip counter increment.
+        None
+    }
+
+    fn get_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a> {
+        //'gep' must read through the pointer with 0 and then index the array
+        create_gep(producer, self.get_signals_ptr(), &[zero(producer), index])
+    }
+
+    fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+        self.get_signals_ptr().into()
+    }
+}
+
+pub struct ExtractedFunctionLLVMIRProducer<'ctx: 'prod, 'prod> {
+    parent: &'prod dyn LLVMIRProducer<'ctx>,
+    ctx: ExtractedFunctionCtx<'ctx>,
+}
+
+impl<'ctx, 'prod> ExtractedFunctionLLVMIRProducer<'ctx, 'prod> {
+    pub fn new(
+        producer: &'prod dyn LLVMIRProducer<'ctx>,
+        current_function: FunctionValue<'ctx>,
+    ) -> Self {
+        ExtractedFunctionLLVMIRProducer {
+            parent: producer,
+            ctx: ExtractedFunctionCtx::new(current_function),
+        }
+    }
+}
+
+impl<'ctx, 'prod> LLVMIRProducer<'ctx> for ExtractedFunctionLLVMIRProducer<'ctx, 'prod> {
+    fn llvm(&self) -> &LLVM<'ctx> {
+        self.parent.llvm()
+    }
+
+    fn context(&self) -> ContextRef<'ctx> {
+        self.parent.context()
+    }
+
+    fn set_current_bb(&self, bb: BasicBlock<'ctx>) {
+        self.parent.set_current_bb(bb)
+    }
+
+    fn template_ctx(&self) -> &dyn TemplateCtx<'ctx> {
+        &self.ctx
+    }
+
+    fn body_ctx(&self) -> &dyn BodyCtx<'ctx> {
+        &self.ctx
+    }
+
+    fn current_function(&self) -> FunctionValue<'ctx> {
+        self.ctx.current_function
+    }
+
+    fn builder(&self) -> &Builder<'ctx> {
+        self.parent.builder()
+    }
+
+    fn constant_fields(&self) -> &Vec<String> {
+        self.parent.constant_fields()
+    }
+
+    fn get_template_mem_arg(&self, _run_fn: FunctionValue<'ctx>) -> ArrayValue<'ctx> {
+        panic!("The function llvm producer can't extract the template argument of a run function!");
     }
 }
diff --git a/code_producers/src/llvm_elements/instructions.rs b/code_producers/src/llvm_elements/instructions.rs
index f1f85ada6..e5290c689 100644
--- a/code_producers/src/llvm_elements/instructions.rs
+++ b/code_producers/src/llvm_elements/instructions.rs
@@ -5,7 +5,7 @@ use inkwell::values::{
     AnyValue, AnyValueEnum, BasicMetadataValueEnum, BasicValue, BasicValueEnum, FunctionValue,
     InstructionOpcode, InstructionValue, IntMathValue, IntValue, PhiValue, PointerValue,
 };
-use crate::llvm_elements::{LLVMIRProducer};
+use crate::llvm_elements::LLVMIRProducer;
 use crate::llvm_elements::fr::{FR_MUL_FN_NAME, FR_LT_FN_NAME};
 use crate::llvm_elements::functions::create_bb;
 use crate::llvm_elements::types::{bigint_type, i32_type};
@@ -537,21 +537,22 @@ pub fn ensure_int_type_match<'a>(
     val: IntValue<'a>,
     ty: IntType<'a>,
 ) -> IntValue<'a> {
-    if val.get_type() == ty {
+    let val_ty = val.get_type();
+    if val_ty == ty {
         // No conversion needed
         val
-    } else if val.get_type() == bool_type(producer) {
+    } else if val_ty == bool_type(producer) {
         // Zero extend
         producer.llvm().builder.build_int_z_extend(val, ty, "")
     } else if ty == bool_type(producer) {
         // Convert to bool
         ensure_bool(producer, val).into_int_value()
+    } else if val_ty.get_bit_width() < ty.get_bit_width() {
+        producer.llvm().builder.build_int_s_extend(val, ty, "")
     } else {
         panic!(
             "Unhandled int conversion of value '{:?}': {:?} to {:?} not supported!",
-            val,
-            val.get_type(),
-            ty
+            val, val_ty, ty
         )
     }
 }
diff --git a/code_producers/src/llvm_elements/mod.rs b/code_producers/src/llvm_elements/mod.rs
index 278bac388..f57e13f26 100644
--- a/code_producers/src/llvm_elements/mod.rs
+++ b/code_producers/src/llvm_elements/mod.rs
@@ -11,8 +11,9 @@ use inkwell::builder::Builder;
 use inkwell::context::{Context, ContextRef};
 use inkwell::debug_info::{DebugInfoBuilder, DICompileUnit};
 use inkwell::module::Module;
+use inkwell::passes::PassManager;
 use inkwell::types::{AnyTypeEnum, BasicType, BasicTypeEnum, IntType};
-use inkwell::values::{ArrayValue, BasicMetadataValueEnum, BasicValueEnum, IntValue};
+use inkwell::values::{ArrayValue, BasicMetadataValueEnum, BasicValueEnum, IntValue, PointerValue};
 pub use inkwell::types::AnyType;
 pub use inkwell::values::{AnyValue, AnyValueEnum, FunctionValue, InstructionOpcode};
 pub use inkwell::debug_info::AsDIScope;
@@ -22,7 +23,6 @@ use program_structure::program_archive::ProgramArchive;
 use crate::components::TemplateInstanceIOMap;
 use crate::llvm_elements::types::bool_type;
 use crate::llvm_elements::instructions::create_alloca;
-use crate::llvm_elements::template::TemplateCtx;
 
 pub mod stdlib;
 pub mod template;
@@ -46,11 +46,44 @@ pub trait BodyCtx<'a> {
     fn get_variable_array(&self, producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a>;
 }
 
+pub trait TemplateCtx<'a> {
+    /// Returns the memory address of the subcomponent
+    fn load_subcmp(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a>;
+
+    /// Creates the necessary code to load a subcomponent given the expression used as id
+    fn load_subcmp_addr(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> PointerValue<'a>;
+
+    /// Creates the necessary code to load a subcomponent counter given the expression used as id
+    fn load_subcmp_counter(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        id: AnyValueEnum<'a>,
+    ) -> Option<PointerValue<'a>>;
+
+    /// Returns a pointer to the signal associated to the index
+    fn get_signal(
+        &self,
+        producer: &dyn LLVMIRProducer<'a>,
+        index: IntValue<'a>,
+    ) -> AnyValueEnum<'a>;
+
+    /// Returns a pointer to the signal array
+    fn get_signal_array(&self, producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a>;
+}
+
 pub trait LLVMIRProducer<'a> {
     fn llvm(&self) -> &LLVM<'a>;
     fn context(&self) -> ContextRef<'a>;
     fn set_current_bb(&self, bb: BasicBlock<'a>);
-    fn template_ctx(&self) -> &TemplateCtx<'a>;
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a>;
     fn body_ctx(&self) -> &dyn BodyCtx<'a>;
     fn current_function(&self) -> FunctionValue<'a>;
     fn builder(&self) -> &Builder<'a>;
@@ -100,7 +133,7 @@ impl<'a> LLVMIRProducer<'a> for TopLevelLLVMIRProducer<'a> {
         self.llvm().builder.position_at_end(bb);
     }
 
-    fn template_ctx(&self) -> &TemplateCtx<'a> {
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a> {
         panic!("The top level llvm producer does not hold a template context!");
     }
 
@@ -288,14 +321,18 @@ impl<'a> LLVM<'a> {
     }
 
     pub fn write_to_file(&self, path: &str) -> Result<(), ()> {
+        // Run LLVM IR inliner for the FR_IDENTITY_* and FR_INDEX_ARR_PTR functions
+        let pm = PassManager::create(());
+        pm.add_always_inliner_pass();
+        pm.run_on(&self.module);
+
         // Must finalize all debug info before running the verifier
         for dbg in self.debug.values() {
             dbg.0.finalize();
         }
         // Run module verification
         self.module.verify().map_err(|llvm_err| {
-            eprintln!("Generated LLVM:");
-            self.module.print_to_stderr();
+            self.dump_module_to_stderr();
             eprintln!(
                 "{}: {}",
                 Colour::Red.paint("LLVM Module verification failed"),
@@ -331,6 +368,11 @@ impl<'a> LLVM<'a> {
             );
         })
     }
+
+    pub fn dump_module_to_stderr(&self) {
+        eprintln!("Generated LLVM:");
+        self.module.print_to_stderr();
+    }
 }
 
 pub fn run_fn_name(name: String) -> String {
diff --git a/code_producers/src/llvm_elements/stdlib.rs b/code_producers/src/llvm_elements/stdlib.rs
index b6c347777..27546f463 100644
--- a/code_producers/src/llvm_elements/stdlib.rs
+++ b/code_producers/src/llvm_elements/stdlib.rs
@@ -2,6 +2,8 @@
 
 use crate::llvm_elements::LLVMIRProducer;
 
+//NOTE: LLVM identifiers can use "." and circom cannot which makes checking for this prefix unambiguous.
+pub const GENERATED_FN_PREFIX: &str = "..generated..";
 pub const CONSTRAINT_VALUES_FN_NAME: &str = "__constraint_values";
 pub const CONSTRAINT_VALUE_FN_NAME: &str = "__constraint_value";
 pub const ASSERT_FN_NAME: &str = "__assert";
diff --git a/code_producers/src/llvm_elements/template.rs b/code_producers/src/llvm_elements/template.rs
index 3152d6493..d23880676 100644
--- a/code_producers/src/llvm_elements/template.rs
+++ b/code_producers/src/llvm_elements/template.rs
@@ -1,90 +1,27 @@
+use std::default::Default;
 use inkwell::basic_block::BasicBlock;
 use inkwell::builder::Builder;
 use inkwell::context::ContextRef;
 use inkwell::types::{AnyType, BasicType, PointerType};
 use inkwell::values::{AnyValueEnum, ArrayValue, FunctionValue, IntValue, PointerValue};
-
-use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer};
+use crate::llvm_elements::{BodyCtx, LLVM, LLVMIRProducer, TemplateCtx};
 use crate::llvm_elements::instructions::{create_alloca, create_gep, create_load};
 use crate::llvm_elements::types::{bigint_type, i32_type};
 use crate::llvm_elements::values::{create_literal_u32, zero};
-use std::default::Default;
-
-pub struct TemplateLLVMIRProducer<'ctx: 'prod, 'prod> {
-    parent: &'prod dyn LLVMIRProducer<'ctx>,
-    template_ctx: TemplateCtx<'ctx>,
-}
-
-impl<'a, 'b> LLVMIRProducer<'a> for TemplateLLVMIRProducer<'a, 'b> {
-    fn llvm(&self) -> &LLVM<'a> {
-        self.parent.llvm()
-    }
-
-    fn context(&self) -> ContextRef<'a> {
-        self.parent.context()
-    }
-
-    fn set_current_bb(&self, bb: BasicBlock<'a>) {
-        self.parent.set_current_bb(bb)
-    }
-
-    fn template_ctx(&self) -> &TemplateCtx<'a> {
-        &self.template_ctx
-    }
 
-    fn body_ctx(&self) -> &dyn BodyCtx<'a> {
-        &self.template_ctx
-    }
-
-    fn current_function(&self) -> FunctionValue<'a> {
-        self.template_ctx.current_function
-    }
-
-    fn builder(&self) -> &Builder<'a> {
-        self.parent.builder()
-    }
-
-    fn constant_fields(&self) -> &Vec<String> {
-        self.parent.constant_fields()
-    }
-
-    fn get_template_mem_arg(&self, run_fn: FunctionValue<'a>) -> ArrayValue<'a> {
-        run_fn
-            .get_nth_param(self.template_ctx.signals_arg_offset as u32)
-            .unwrap()
-            .into_array_value()
-    }
-}
-
-impl<'a, 'b> TemplateLLVMIRProducer<'a, 'b> {
-    pub fn new(
-        parent: &'b dyn LLVMIRProducer<'a>,
-        stack_depth: usize,
-        number_subcmps: usize,
-        current_function: FunctionValue<'a>,
-        template_type: PointerType<'a>,
-        signals_arg_offset: usize,
-    ) -> Self {
-        TemplateLLVMIRProducer {
-            parent,
-            template_ctx: TemplateCtx::new(
-                parent,
-                stack_depth,
-                number_subcmps,
-                current_function,
-                template_type,
-                signals_arg_offset,
-            ),
-        }
-    }
+pub fn create_template_struct<'a>(
+    producer: &dyn LLVMIRProducer<'a>,
+    n_signals: usize,
+) -> PointerType<'a> {
+    bigint_type(producer).array_type(n_signals as u32).ptr_type(Default::default())
 }
 
-pub struct TemplateCtx<'a> {
-    pub stack: PointerValue<'a>,
+struct StdTemplateCtx<'a> {
+    stack: PointerValue<'a>,
     subcmps: PointerValue<'a>,
-    pub current_function: FunctionValue<'a>,
-    pub template_type: PointerType<'a>,
-    pub signals_arg_offset: usize,
+    current_function: FunctionValue<'a>,
+    template_type: PointerType<'a>,
+    signals_arg_offset: usize,
 }
 
 #[inline]
@@ -106,8 +43,8 @@ fn setup_stack<'a>(producer: &dyn LLVMIRProducer<'a>, stack_depth: usize) -> Poi
         .into_pointer_value()
 }
 
-impl<'a> TemplateCtx<'a> {
-    pub fn new(
+impl<'a> StdTemplateCtx<'a> {
+    fn new(
         producer: &dyn LLVMIRProducer<'a>,
         stack_depth: usize,
         number_subcmps: usize,
@@ -115,7 +52,7 @@ impl<'a> TemplateCtx<'a> {
         template_type: PointerType<'a>,
         signals_arg_offset: usize,
     ) -> Self {
-        TemplateCtx {
+        StdTemplateCtx {
             stack: setup_stack(producer, stack_depth),
             subcmps: setup_subcmps(producer, number_subcmps),
             current_function,
@@ -123,9 +60,10 @@ impl<'a> TemplateCtx<'a> {
             signals_arg_offset,
         }
     }
+}
 
-    /// Returns the memory address of the subcomponent
-    pub fn load_subcmp(
+impl<'a> TemplateCtx<'a> for StdTemplateCtx<'a> {
+    fn load_subcmp(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
@@ -134,8 +72,7 @@ impl<'a> TemplateCtx<'a> {
             .into_pointer_value()
     }
 
-    /// Creates the necessary code to load a subcomponent given the expression used as id
-    pub fn load_subcmp_addr(
+    fn load_subcmp_addr(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
@@ -149,22 +86,22 @@ impl<'a> TemplateCtx<'a> {
         create_load(producer, signals).into_pointer_value()
     }
 
-    /// Creates the necessary code to load a subcomponent counter given the expression used as id
-    pub fn load_subcmp_counter(
+    fn load_subcmp_counter(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         id: AnyValueEnum<'a>,
-    ) -> PointerValue<'a> {
-        create_gep(
-            producer,
-            self.subcmps,
-            &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
+    ) -> Option<PointerValue<'a>> {
+        Some(
+            create_gep(
+                producer,
+                self.subcmps,
+                &[zero(producer), id.into_int_value(), create_literal_u32(producer, 1)],
+            )
+            .into_pointer_value(),
         )
-        .into_pointer_value()
     }
 
-    /// Returns a pointer to the signal associated to the index
-    pub fn get_signal(
+    fn get_signal(
         &self,
         producer: &dyn LLVMIRProducer<'a>,
         index: IntValue<'a>,
@@ -173,14 +110,13 @@ impl<'a> TemplateCtx<'a> {
         create_gep(producer, signals.into_pointer_value(), &[zero(producer), index])
     }
 
-    /// Returns a pointer to the signal array
-    pub fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
+    fn get_signal_array(&self, _producer: &dyn LLVMIRProducer<'a>) -> AnyValueEnum<'a> {
         let signals = self.current_function.get_nth_param(self.signals_arg_offset as u32).unwrap();
         signals.into_pointer_value().into()
     }
 }
 
-impl<'a> BodyCtx<'a> for TemplateCtx<'a> {
+impl<'a> BodyCtx<'a> for StdTemplateCtx<'a> {
     /// Returns a reference to the local variable associated to the index
     fn get_variable(
         &self,
@@ -195,9 +131,68 @@ impl<'a> BodyCtx<'a> for TemplateCtx<'a> {
     }
 }
 
-pub fn create_template_struct<'a>(
-    producer: &dyn LLVMIRProducer<'a>,
-    n_signals: usize,
-) -> PointerType<'a> {
-    bigint_type(producer).array_type(n_signals as u32).ptr_type(Default::default())
+pub struct TemplateLLVMIRProducer<'ctx: 'prod, 'prod> {
+    parent: &'prod dyn LLVMIRProducer<'ctx>,
+    ctx: StdTemplateCtx<'ctx>,
+}
+
+impl<'a, 'b> LLVMIRProducer<'a> for TemplateLLVMIRProducer<'a, 'b> {
+    fn llvm(&self) -> &LLVM<'a> {
+        self.parent.llvm()
+    }
+
+    fn context(&self) -> ContextRef<'a> {
+        self.parent.context()
+    }
+
+    fn set_current_bb(&self, bb: BasicBlock<'a>) {
+        self.parent.set_current_bb(bb)
+    }
+
+    fn template_ctx(&self) -> &dyn TemplateCtx<'a> {
+        &self.ctx
+    }
+
+    fn body_ctx(&self) -> &dyn BodyCtx<'a> {
+        &self.ctx
+    }
+
+    fn current_function(&self) -> FunctionValue<'a> {
+        self.ctx.current_function
+    }
+
+    fn builder(&self) -> &Builder<'a> {
+        self.parent.builder()
+    }
+
+    fn constant_fields(&self) -> &Vec<String> {
+        self.parent.constant_fields()
+    }
+
+    fn get_template_mem_arg(&self, run_fn: FunctionValue<'a>) -> ArrayValue<'a> {
+        run_fn.get_nth_param(self.ctx.signals_arg_offset as u32).unwrap().into_array_value()
+    }
+}
+
+impl<'a, 'b> TemplateLLVMIRProducer<'a, 'b> {
+    pub fn new(
+        parent: &'b dyn LLVMIRProducer<'a>,
+        stack_depth: usize,
+        number_subcmps: usize,
+        current_function: FunctionValue<'a>,
+        template_type: PointerType<'a>,
+        signals_arg_offset: usize,
+    ) -> Self {
+        TemplateLLVMIRProducer {
+            parent,
+            ctx: StdTemplateCtx::new(
+                parent,
+                stack_depth,
+                number_subcmps,
+                current_function,
+                template_type,
+                signals_arg_offset,
+            ),
+        }
+    }
 }
diff --git a/code_producers/src/llvm_elements/values.rs b/code_producers/src/llvm_elements/values.rs
index e1549acbc..987b4125b 100644
--- a/code_producers/src/llvm_elements/values.rs
+++ b/code_producers/src/llvm_elements/values.rs
@@ -1,7 +1,6 @@
 use inkwell::types::StringRadix;
 use inkwell::values::{AnyValue, AnyValueEnum, IntValue};
-
-use crate::llvm_elements::{LLVMIRProducer};
+use crate::llvm_elements::LLVMIRProducer;
 use crate::llvm_elements::types::bigint_type;
 
 pub fn create_literal_u32<'a>(producer: &dyn LLVMIRProducer<'a>, val: u64) -> IntValue<'a> {
diff --git a/compiler/src/circuit_design/circuit.rs b/compiler/src/circuit_design/circuit.rs
index 58dc940e3..658ca9e25 100644
--- a/compiler/src/circuit_design/circuit.rs
+++ b/compiler/src/circuit_design/circuit.rs
@@ -7,13 +7,15 @@ use crate::hir::very_concrete_program::VCP;
 use crate::intermediate_representation::ir_interface::ObtainMeta;
 use crate::translating_traits::*;
 use code_producers::c_elements::*;
-use code_producers::llvm_elements::array_switch::load_array_switch;
-use code_producers::wasm_elements::*;
 use code_producers::llvm_elements::*;
+use code_producers::llvm_elements::array_switch::load_array_switch;
 use code_producers::llvm_elements::fr::load_fr;
-use code_producers::llvm_elements::functions::{create_function, FunctionLLVMIRProducer};
-use code_producers::llvm_elements::stdlib::load_stdlib;
+use code_producers::llvm_elements::functions::{
+    create_function, FunctionLLVMIRProducer, ExtractedFunctionLLVMIRProducer,
+};
+use code_producers::llvm_elements::stdlib::{load_stdlib, GENERATED_FN_PREFIX};
 use code_producers::llvm_elements::types::{bigint_type, void_type};
+use code_producers::wasm_elements::*;
 use program_structure::program_archive::ProgramArchive;
 
 pub struct CompilationFlags {
@@ -43,7 +45,10 @@ impl Default for Circuit {
 }
 
 impl WriteLLVMIR for Circuit {
-    fn produce_llvm_ir<'a, 'b>(&self, producer: &'b dyn LLVMIRProducer<'a>) -> Option<LLVMInstruction<'a>> {
+    fn produce_llvm_ir<'a, 'b>(
+        &self,
+        producer: &'b dyn LLVMIRProducer<'a>,
+    ) -> Option<LLVMInstruction<'a>> {
         // Code for prelude
 
         // Code for standard library?
@@ -52,8 +57,11 @@ impl WriteLLVMIR for Circuit {
 
         // Generate all the switch functions
         let mut ranges = HashSet::new();
-        let mappings = [&self.llvm_data.signal_index_mapping, &self.llvm_data.variable_index_mapping, &self.llvm_data.component_index_mapping];
-
+        let mappings = [
+            &self.llvm_data.signal_index_mapping,
+            &self.llvm_data.variable_index_mapping,
+            &self.llvm_data.component_index_mapping,
+        ];
         for mapping in mappings {
             for range_mapping in mapping.values() {
                 for range in range_mapping.values() {
@@ -61,7 +69,6 @@ impl WriteLLVMIR for Circuit {
                 }
             }
         }
-
         for range in ranges {
             load_array_switch(producer, range);
         }
@@ -70,37 +77,97 @@ impl WriteLLVMIR for Circuit {
         let mut funcs = HashMap::new();
         for f in &self.functions {
             let name = f.header.as_str();
-            let arena_ty = bigint_type(producer).ptr_type(Default::default());
+            let param_types = if name.starts_with(GENERATED_FN_PREFIX) {
+                // Use the FunctionCodeInfo instance to generate the vector of parameter types.
+                let mut types = vec![];
+                for p in &f.params {
+                    // This section is a little more complicated than desired because IntType and ArrayType do
+                    //  not have a common Trait that defines the `array_type` and `ptr_type` member functions.
+                    let ty = match &p.length[..] {
+                        // [] -> i256*
+                        [] => bigint_type(producer).ptr_type(Default::default()),
+                        // [A] -> [A x i256]*
+                        [a] => {
+                            bigint_type(producer).array_type(*a as u32).ptr_type(Default::default())
+                        }
+                        // [A,B,C,...] -> [C x [B x [A x i256]*]*]*
+                        [a, rest @ ..] => {
+                            let mut temp = bigint_type(producer).array_type(*a as u32);
+                            for size in rest {
+                                temp = temp.array_type(*size as u32);
+                            }
+                            temp.ptr_type(Default::default())
+                        }
+                    };
+                    types.push(ty.into());
+                }
+                types
+            } else {
+                vec![bigint_type(producer).ptr_type(Default::default()).into()]
+            };
             let function = create_function(
                 producer,
                 f.get_source_file_id(),
                 f.get_line(),
                 f.name.as_str(),
                 name,
-                if f.returns.is_empty() || (f.returns.len() == 1 && *f.returns.get(0).unwrap() == 1)
-                {
-                    bigint_type(producer).fn_type(&[arena_ty.into()], false)
+                if f.returns.len() == 1 {
+                    let single_size = *f.returns.get(0).unwrap();
+                    if single_size == 0 {
+                        //single dimension of size 0 indicates [0 x i256]* should be used
+                        bigint_type(producer)
+                            .array_type(0)
+                            .ptr_type(Default::default())
+                            .fn_type(&param_types, false)
+                    } else if single_size == 1 {
+                        // single dimension of size 1 is a scalar
+                        bigint_type(producer).fn_type(&param_types, false)
+                    } else {
+                        // single dimension size>1 must return via pointer argument
+                        void_type(producer).fn_type(&param_types, false)
+                    }
                 } else {
-                    void_type(producer).fn_type(&[arena_ty.into()], false)
+                    // multiple dimensions must return via pointer argument
+                    //  and zero dimensions indicates void return
+                    void_type(producer).fn_type(&param_types, false)
                 },
             );
+
+            // Preserve names (only for generated b/c source functions use only 1 argument)
+            if name.starts_with(GENERATED_FN_PREFIX) {
+                for (i, p) in f.params.iter().enumerate() {
+                    function.get_nth_param(i as u32).unwrap().set_name(&p.name);
+                }
+            }
+
             funcs.insert(name, function);
         }
 
-        // Code for the functions
+        // Code for the functions (except for generated functions)
+        let mut generated_functions = vec![];
         for f in &self.functions {
-            let function_producer = FunctionLLVMIRProducer::new(producer, funcs[f.header.as_str()]);
-            Self::manage_debug_loc_from_curr(&function_producer, f.as_ref());
-            f.produce_llvm_ir(&function_producer);
+            if f.header.starts_with(GENERATED_FN_PREFIX) {
+                // Hold for later because the body could reference templates
+                //  and the LLVM functions for templates were not pre-defined.
+                generated_functions.push(f);
+            } else {
+                let current_function = funcs[f.header.as_str()];
+                f.produce_llvm_ir(&FunctionLLVMIRProducer::new(producer, current_function));
+            }
         }
 
         // Code for the templates
         for t in &self.templates {
-            println!("Generating code for {}", t.header);
-            // code.append(&mut t.produce_llvm_ir(producer));
             t.produce_llvm_ir(producer);
         }
 
+        // Code for generated functions
+        for f in generated_functions {
+            assert!(f.header.starts_with(GENERATED_FN_PREFIX));
+            let current_function = funcs[f.header.as_str()];
+            f.produce_llvm_ir(&ExtractedFunctionLLVMIRProducer::new(producer, current_function));
+        }
+
         // Code for prologue
 
         None // No need to return at this level
diff --git a/compiler/src/circuit_design/function.rs b/compiler/src/circuit_design/function.rs
index 997079b54..96ce82739 100644
--- a/compiler/src/circuit_design/function.rs
+++ b/compiler/src/circuit_design/function.rs
@@ -5,7 +5,7 @@ use crate::intermediate_representation::ir_interface::ObtainMeta;
 use crate::translating_traits::*;
 use code_producers::c_elements::*;
 use code_producers::llvm_elements::{LLVMInstruction, LLVMIRProducer};
-use code_producers::llvm_elements::functions::{create_bb};
+use code_producers::llvm_elements::functions::create_bb;
 use code_producers::llvm_elements::instructions::create_br;
 
 use code_producers::wasm_elements::*;
@@ -49,6 +49,8 @@ impl ToString for FunctionCodeInfo {
 
 impl WriteLLVMIR for FunctionCodeInfo {
     fn produce_llvm_ir<'ctx, 'prod>(&self, producer: &'prod dyn LLVMIRProducer<'ctx>) -> Option<LLVMInstruction<'ctx>> {
+        println!("Generating code for {}", self.header);
+        Self::manage_debug_loc_from_curr(producer, self);
         let function = producer.current_function();
         let main = create_bb(producer, function, self.header.as_str());
         producer.set_current_bb(main);
diff --git a/compiler/src/circuit_design/template.rs b/compiler/src/circuit_design/template.rs
index 5a80126ad..c0605aed2 100644
--- a/compiler/src/circuit_design/template.rs
+++ b/compiler/src/circuit_design/template.rs
@@ -59,6 +59,7 @@ impl ToString for TemplateCodeInfo {
 
 impl WriteLLVMIR for TemplateCodeInfo {
     fn produce_llvm_ir<'ctx, 'prod>(&self, producer: &'prod dyn LLVMIRProducer<'ctx>) -> Option<LLVMInstruction<'ctx>> {
+        println!("Generating code for {}", self.header);
         let void = void_type(producer);
         let n_signals = self.number_of_inputs + self.number_of_outputs + self.number_of_intermediates;
         let template_struct = create_template_struct(producer, n_signals);
diff --git a/compiler/src/intermediate_representation/address_type.rs b/compiler/src/intermediate_representation/address_type.rs
index 1501d54c2..b31a6afa4 100644
--- a/compiler/src/intermediate_representation/address_type.rs
+++ b/compiler/src/intermediate_representation/address_type.rs
@@ -11,14 +11,20 @@ pub enum StatusInput {
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum InputInformation {
     NoInput,
-    Input {status: StatusInput},
+    Input { status: StatusInput },
 }
 
 #[derive(Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum AddressType {
     Variable,
     Signal,
-    SubcmpSignal { cmp_address: InstructionPointer, uniform_parallel_value: Option<bool>, is_output: bool, input_information: InputInformation },
+    SubcmpSignal {
+        cmp_address: InstructionPointer,
+        uniform_parallel_value: Option<bool>,
+        is_output: bool,
+        input_information: InputInformation,
+        counter_override: bool,
+    },
 }
 
 impl ToString for AddressType {
@@ -27,7 +33,11 @@ impl ToString for AddressType {
         match self {
             Variable => "VARIABLE".to_string(),
             Signal => "SIGNAL".to_string(),
-            SubcmpSignal { cmp_address, .. } => format!("SUBCOMPONENT:{}", cmp_address.to_string()),
+            SubcmpSignal { cmp_address, counter_override, .. } => format!(
+                "{}:{}",
+                if *counter_override { "SUBCOMP_COUNTER" } else { "SUBCOMPONENT" },
+                cmp_address.to_string()
+            ),
         }
     }
 }
@@ -38,10 +48,12 @@ impl ToSExp for AddressType {
         match self {
             Variable => SExp::Atom("VARIABLE".to_string()),
             Signal => SExp::Atom("SIGNAL".to_string()),
-            SubcmpSignal { cmp_address, .. } => SExp::List(vec![
-                SExp::Atom("SUBCOMPONENT".to_string()),
-                cmp_address.to_sexp()
-            ])
+            SubcmpSignal { cmp_address, counter_override, .. } => SExp::List(vec![
+                SExp::Atom(
+                    if *counter_override { "SUBCOMP_COUNTER" } else { "SUBCOMPONENT" }.to_string(),
+                ),
+                cmp_address.to_sexp(),
+            ]),
         }
     }
 }
@@ -50,8 +62,8 @@ impl UpdateId for AddressType {
     fn update_id(&mut self) {
         use AddressType::*;
         match self {
-            SubcmpSignal { cmp_address, ..} => cmp_address.update_id(),
+            SubcmpSignal { cmp_address, .. } => cmp_address.update_id(),
             _ => {}
         }
     }
-}
\ No newline at end of file
+}
diff --git a/compiler/src/intermediate_representation/block_bucket.rs b/compiler/src/intermediate_representation/block_bucket.rs
index 12a1f5623..c95e478c1 100644
--- a/compiler/src/intermediate_representation/block_bucket.rs
+++ b/compiler/src/intermediate_representation/block_bucket.rs
@@ -13,6 +13,7 @@ pub struct BlockBucket {
     pub message_id: usize,
     pub body: InstructionList,
     pub n_iters: usize,
+    pub label: String,
 }
 
 impl IntoInstruction for BlockBucket {
diff --git a/compiler/src/intermediate_representation/call_bucket.rs b/compiler/src/intermediate_representation/call_bucket.rs
index cf5bb8e10..f59b8b3d0 100644
--- a/compiler/src/intermediate_representation/call_bucket.rs
+++ b/compiler/src/intermediate_representation/call_bucket.rs
@@ -107,93 +107,128 @@ impl WriteLLVMIR for CallBucket {
     ) -> Option<LLVMInstruction<'a>> {
         Self::manage_debug_loc_from_curr(producer, self);
 
-        // Create array with arena_size size
-        let bigint_arr = bigint_type(producer).array_type(self.arena_size as u32);
-        let arena =
-            create_alloca(producer, bigint_arr.into(), format!("{}_arena", self.symbol).as_str());
+        // Check arena_size==0 which indicates arguments should not be placed into arena
+        let arena_size = self.arena_size;
+        if arena_size == 0 {
+            let mut args = vec![];
+            for arg in self.arguments.iter() {
+                args.push(to_basic_metadata_enum(
+                    arg.produce_llvm_ir(producer).expect("Call arguments must produce a value!"),
+                ));
+            }
+            let call_ret_val = create_call(producer, self.symbol.as_str(), &args);
+            return Some(call_ret_val);
+        } else {
+            // Create array with arena_size size
+            let arena = create_alloca(
+                producer,
+                bigint_type(producer).array_type(arena_size as u32).into(),
+                format!("{}_arena", self.symbol).as_str(),
+            );
 
-        // Get the offsets based on the sizes of the arguments
-        let offsets: Vec<usize> = self.argument_types.iter().scan(0, |state, arg_ty| {
-            let curr_offset = *state;
-            *state = *state + arg_ty.size;
-            Some(curr_offset)
-        }).collect();
+            // Get the offsets based on the sizes of the arguments
+            let offsets: Vec<usize> = self
+                .argument_types
+                .iter()
+                .scan(0, |state, arg_ty| {
+                    let curr_offset = *state;
+                    *state = *state + arg_ty.size;
+                    Some(curr_offset)
+                })
+                .collect();
 
-        // Copy arguments into elements of the arena by indexing order (arg 0 -> arena 0, arg 1 -> arena 1, etc)
-        for ((arg, arg_ty), offset) in self
-            .arguments
-            .iter()
-            .zip(&self.argument_types)
-            .zip(offsets)
-        {
-            let i = create_literal_u32(producer, offset as u64);
-            let ptr = create_gep(producer, arena.into_pointer_value(), &[zero(producer), i]).into_pointer_value();
-            if arg_ty.size > 1 {
-                let src_arg = match arg.as_ref() {
-                    Instruction::Load(v) => {
-                        let index = v.src.produce_llvm_ir(producer).expect("We need to produce some kind of instruction!").into_int_value();
-                        let gep = match &v.address_type {
-                            AddressType::Variable => producer.body_ctx().get_variable(producer, index),
-                            AddressType::Signal => producer.template_ctx().get_signal(producer, index),
-                            AddressType::SubcmpSignal { cmp_address, ..  } => {
-                                let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                                let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                                create_gep(producer, subcmp, &[zero(producer), index])
+            // Copy arguments into elements of the arena by indexing order (arg 0 -> arena 0, arg 1 -> arena 1, etc)
+            for ((arg, arg_ty), offset) in
+                self.arguments.iter().zip(&self.argument_types).zip(offsets)
+            {
+                let i = create_literal_u32(producer, offset as u64);
+                let ptr = create_gep(producer, arena.into_pointer_value(), &[zero(producer), i])
+                    .into_pointer_value();
+                if arg_ty.size > 1 {
+                    let src_arg = match arg.as_ref() {
+                        Instruction::Load(v) => {
+                            let index = v
+                                .src
+                                .produce_llvm_ir(producer)
+                                .expect("We need to produce some kind of instruction!")
+                                .into_int_value();
+                            let gep = match &v.address_type {
+                                AddressType::Variable => {
+                                    producer.body_ctx().get_variable(producer, index)
+                                }
+                                AddressType::Signal => {
+                                    producer.template_ctx().get_signal(producer, index)
+                                }
+                                AddressType::SubcmpSignal { cmp_address, .. } => {
+                                    let addr = cmp_address.produce_llvm_ir(producer).expect(
+                                        "The address of a subcomponent must yield a value!",
+                                    );
+                                    let subcmp =
+                                        producer.template_ctx().load_subcmp_addr(producer, addr);
+                                    create_gep(producer, subcmp, &[zero(producer), index])
+                                }
                             }
-                        }.into_pointer_value();
-                        gep
-                    },
-                    _ => unreachable!(),
-                };
-                let len_arg = create_literal_u32(producer, arg_ty.size as u64);
-                create_call(producer, FR_ARRAY_COPY_FN_NAME, &[src_arg.into(), ptr.into(), len_arg.into()]);
-            } else {
-                let arg_load = arg.produce_llvm_ir(producer).expect("Call arguments must produce a value!");
-                create_store(producer, ptr, arg_load);
+                            .into_pointer_value();
+                            gep
+                        }
+                        _ => unreachable!(),
+                    };
+                    let len_arg = create_literal_u32(producer, arg_ty.size as u64);
+                    create_call(
+                        producer,
+                        FR_ARRAY_COPY_FN_NAME,
+                        &[src_arg.into(), ptr.into(), len_arg.into()],
+                    );
+                } else {
+                    let arg_load = arg
+                        .produce_llvm_ir(producer)
+                        .expect("Call arguments must produce a value!");
+                    create_store(producer, ptr, arg_load);
+                }
             }
-        }
 
-        let arena = pointer_cast(
-            producer,
-            arena.into_pointer_value(),
-            bigint_type(producer).ptr_type(Default::default()),
-        );
+            let arena = pointer_cast(
+                producer,
+                arena.into_pointer_value(),
+                bigint_type(producer).ptr_type(Default::default()),
+            );
 
-        // Call function passing the array as argument
-        let call_ret_val = create_call(
-            producer,
-            self.symbol.as_str(),
-            &[to_basic_metadata_enum(arena.into())],
-        );
+            // Call function passing the array as argument
+            let call_ret_val = create_call(
+                producer,
+                self.symbol.as_str(),
+                &[to_basic_metadata_enum(arena.into())],
+            );
 
-        match &self.return_info {
-            ReturnType::Intermediate { op_aux_no } => {
-                todo!("ReturnType::Intermediate {:#?}", op_aux_no);
-            }
-            ReturnType::Final(data) => {
-                let size = data.context.size;
-                let source_of_store = if size == 1 {
-                    //For scalar returns, store the returned value to
-                    //  the proper index in the current function's arena.
-                    call_ret_val
-                } else {
-                    //For array returns, copy the data from the callee arena to the caller arena.
-                    create_gep(
+            match &self.return_info {
+                ReturnType::Intermediate { op_aux_no } => {
+                    todo!("ReturnType::Intermediate {:#?}", op_aux_no);
+                }
+                ReturnType::Final(data) => {
+                    let size = data.context.size;
+                    let source_of_store = if size == 1 {
+                        //For scalar returns, store the returned value to
+                        //  the proper index in the current function's arena.
+                        call_ret_val
+                    } else {
+                        //For array returns, copy the data from the callee arena to the caller arena.
+                        create_gep(
+                            producer,
+                            arena,
+                            &[i32_type(producer).const_int(self.arguments.len() as u64, false)],
+                        )
+                    };
+                    return StoreBucket::produce_llvm_ir(
                         producer,
-                        arena,
-                        &[i32_type(producer).const_int(self.arguments.len() as u64, false)],
-                    )
-                };
-                return StoreBucket::produce_llvm_ir(
-                    producer,
-                    Either::Left(source_of_store),
-                    &data.dest,
-                    &data.dest_address_type,
-                    InstrContext { size },
-                    &None,
-                );
-            }
-        };
+                        Either::Left(source_of_store),
+                        &data.dest,
+                        &data.dest_address_type,
+                        InstrContext { size },
+                        &None,
+                    );
+                }
+            };
+        }
     }
 }
 
diff --git a/compiler/src/intermediate_representation/ir_interface.rs b/compiler/src/intermediate_representation/ir_interface.rs
index e5eb88cde..96c6e3a50 100644
--- a/compiler/src/intermediate_representation/ir_interface.rs
+++ b/compiler/src/intermediate_representation/ir_interface.rs
@@ -13,7 +13,7 @@ pub use super::store_bucket::StoreBucket;
 pub use super::log_bucket::LogBucketArg;
 pub use super::types::{InstrContext, ValueType};
 pub use super::value_bucket::ValueBucket;
-pub use super::constraint_bucket::{ConstraintBucket};
+pub use super::constraint_bucket::ConstraintBucket;
 pub use super::block_bucket::BlockBucket;
 pub use super::nop_bucket::NopBucket;
 
@@ -258,24 +258,25 @@ impl Instruction {
     pub fn label_name(&self, idx: u32) -> String {
         use Instruction::*;
         match self {
-            Value(_v) => format!("value{}", idx),
-            Load(_v) => format!("load{}", idx),
-            Store(_v) => format!("store{}", idx),
-            Compute(_v) => format!("compute{}", idx),
-            Call(_v) => format!("call{}", idx),
-            Branch(_v) => format!("branch{}", idx),
-            Return(_v) => format!("return{}", idx),
-            Loop(_v) => format!("loop{}", idx),
-            Assert(_v) => format!("assert{}", idx),
-            CreateCmp(_v) => format!("create_cmp{}", idx),
-            Log(_v) => format!("log{}", idx),
+            Value(_) => format!("value{}", idx),
+            Load(_) => format!("load{}", idx),
+            Store(_) => format!("store{}", idx),
+            Compute(_) => format!("compute{}", idx),
+            Call(_) => format!("call{}", idx),
+            Branch(_) => format!("branch{}", idx),
+            Return(_) => format!("return{}", idx),
+            Loop(_) => format!("loop{}", idx),
+            Assert(_) => format!("assert{}", idx),
+            CreateCmp(_) => format!("create_cmp{}", idx),
+            Log(_) => format!("log{}", idx),
             // We use the label name of the wrapped instruction
             Constraint(v) => match v {
                 ConstraintBucket::Substitution(i) => i,
-                ConstraintBucket::Equality(i) => i
-            }.label_name(idx),
-            Block(_) => format!("unrolled_loop{}", idx),
-            Nop(_) => format!("nop{}", idx)
+                ConstraintBucket::Equality(i) => i,
+            }
+            .label_name(idx),
+            Block(BlockBucket { label, .. }) => format!("{}{}", label, idx),
+            Nop(_) => format!("nop{}", idx),
         }
     }
 }
diff --git a/compiler/src/intermediate_representation/load_bucket.rs b/compiler/src/intermediate_representation/load_bucket.rs
index dac5f25f0..564715379 100644
--- a/compiler/src/intermediate_representation/load_bucket.rs
+++ b/compiler/src/intermediate_representation/load_bucket.rs
@@ -87,28 +87,44 @@ impl WriteLLVMIR for LoadBucket {
         // If we have bounds for an unknown index, we will get the base address and let the function check the bounds
         let load = match &self.bounded_fn {
             Some(name) => {
-                let arr_ptr = match &self.address_type {
-                    AddressType::Variable => producer.body_ctx().get_variable_array(producer),
-                    AddressType::Signal => producer.template_ctx().get_signal_array(producer),
-                    AddressType::SubcmpSignal { cmp_address, .. } => {
-                        let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer)])
-                    },
-                }.into_pointer_value();
-                let arr_ptr = pointer_cast(producer, arr_ptr, array_ptr_ty(producer));
-                create_call(producer, name.as_str(), &[arr_ptr.into(), index.into()])
+                let get_ptr = || {
+                    let arr_ptr = match &self.address_type {
+                        AddressType::Variable => producer.body_ctx().get_variable_array(producer),
+                        AddressType::Signal => producer.template_ctx().get_signal_array(producer),
+                        AddressType::SubcmpSignal { cmp_address, counter_override, .. } => {
+                            let addr = cmp_address.produce_llvm_ir(producer)
+                                .expect("The address of a subcomponent must yield a value!");
+                            if *counter_override {
+                                return producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
+                            } else {
+                                let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
+                                create_gep(producer, subcmp, &[zero(producer)])
+                            }
+                        }
+                    };
+                    pointer_cast(producer, arr_ptr.into_pointer_value(), array_ptr_ty(producer))
+                };
+                create_call(producer, name.as_str(), &[get_ptr().into(), index.into()])
             },
             None => {
                 let gep = match &self.address_type {
-                    AddressType::Variable => producer.body_ctx().get_variable(producer, index),
-                    AddressType::Signal => producer.template_ctx().get_signal(producer, index),
-                    AddressType::SubcmpSignal { cmp_address, ..  } => {
+                    AddressType::Variable => producer.body_ctx().get_variable(producer, index).into_pointer_value(),
+                    AddressType::Signal => producer.template_ctx().get_signal(producer, index).into_pointer_value(),
+                    AddressType::SubcmpSignal { cmp_address, counter_override, ..  } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
-                        let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer), index])
+                        if *counter_override {
+                            producer.template_ctx().load_subcmp_counter(producer, addr).expect("could not find counter!")
+                        } else {
+                            let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
+                            if subcmp.get_type().get_element_type().is_array_type() {
+                                create_gep(producer, subcmp, &[zero(producer), index]).into_pointer_value()
+                            } else {
+                                assert_eq!(zero(producer), index);
+                                create_gep(producer, subcmp, &[index]).into_pointer_value()
+                            }
+                        }
                     }
-                }.into_pointer_value();
+                };
                 create_load(producer, gep)
             },
         };
diff --git a/compiler/src/intermediate_representation/store_bucket.rs b/compiler/src/intermediate_representation/store_bucket.rs
index 4bc0c936c..0787c9d38 100644
--- a/compiler/src/intermediate_representation/store_bucket.rs
+++ b/compiler/src/intermediate_representation/store_bucket.rs
@@ -126,7 +126,12 @@ impl StoreBucket{
                     AddressType::SubcmpSignal { cmp_address, .. } => {
                         let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
                         let subcmp = producer.template_ctx().load_subcmp_addr(producer, addr);
-                        create_gep(producer, subcmp, &[zero(producer), dest_index])
+                        if subcmp.get_type().get_element_type().is_array_type() {
+                            create_gep(producer, subcmp, &[zero(producer), dest_index])
+                        } else {
+                            assert_eq!(zero(producer), dest_index);
+                            create_gep(producer, subcmp, &[dest_index])
+                        }
                     }
                 }.into_pointer_value();
                 if context.size > 1 {
@@ -173,19 +178,21 @@ impl StoreBucket{
         if let AddressType::SubcmpSignal { cmp_address, .. } = &dest_address_type {
             let addr = cmp_address.produce_llvm_ir(producer).expect("The address of a subcomponent must yield a value!");
             let counter = producer.template_ctx().load_subcmp_counter(producer, addr);
-            let value = create_load_with_name(producer, counter, "load.subcmp.counter");
-            let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, context.size as u64), "decrement.counter");
-            create_store(producer, counter, new_value);
+            if let Some(counter) = counter {
+                let value = create_load_with_name(producer, counter, "load.subcmp.counter");
+                let new_value = create_sub_with_name(producer, value.into_int_value(), create_literal_u32(producer, context.size as u64), "decrement.counter");
+                create_store(producer, counter, new_value);
+            }
         }
 
-        let sub_cmp_name = match &dest {
-            LocationRule::Indexed { template_header, .. } => template_header.clone(),
-            LocationRule::Mapped { .. } => None
-        };
         // If the input information is unknown add a check that checks the counter and if its zero call the subcomponent
         // If its last just call run directly
         if let AddressType::SubcmpSignal { input_information, cmp_address, .. } = &dest_address_type {
             if let InputInformation::Input { status } = input_information {
+                let sub_cmp_name = match &dest {
+                    LocationRule::Indexed { template_header, .. } => template_header.clone(),
+                    LocationRule::Mapped { .. } => None
+                };
                 match status {
                     StatusInput::Last => {
                         let run_fn = run_fn_name(sub_cmp_name.expect("Could not get the name of the subcomponent"));
diff --git a/compiler/src/intermediate_representation/translate.rs b/compiler/src/intermediate_representation/translate.rs
index 2d10881d3..7eef90661 100644
--- a/compiler/src/intermediate_representation/translate.rs
+++ b/compiler/src/intermediate_representation/translate.rs
@@ -1196,10 +1196,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, expr.get_meta()),
                 is_output: self.signal_type.unwrap() == SignalType::Output,
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status: StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             FinalData {
                 context: InstrContext { size: self.length },
@@ -1240,10 +1241,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, stmt.get_meta()),
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
                 is_output: self.signal_type.unwrap() == SignalType::Output,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status:StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             StoreBucket {
                 id: new_id(),
@@ -1286,10 +1288,11 @@ impl ProcessedSymbol {
                 cmp_address: compute_full_address(state, self.symbol, self.before_signal, expr.get_meta()),
                 uniform_parallel_value: state.component_to_parallel.get(&self.name).unwrap().uniform_parallel_value,
                 is_output: self.signal_type.unwrap() == SignalType::Output,
-                input_information : match self.signal_type.unwrap() {
+                input_information: match self.signal_type.unwrap() {
                     SignalType::Input => InputInformation::Input { status: StatusInput:: Unknown},
                     _ => InputInformation::NoInput,
                 },
+                counter_override: false,
             };
             LoadBucket {
                 id: new_id(),
diff --git a/compiler/src/ir_processing/reduce_stack.rs b/compiler/src/ir_processing/reduce_stack.rs
index f4909322a..3ce9f9dbe 100644
--- a/compiler/src/ir_processing/reduce_stack.rs
+++ b/compiler/src/ir_processing/reduce_stack.rs
@@ -164,9 +164,9 @@ pub fn reduce_address_type(at: AddressType) -> AddressType {
     match at {
         Variable => Variable,
         Signal => Signal,
-        SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information } => {
+        SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information, counter_override} => {
             let cmp_address = Allocate::allocate(reduce_instruction(*cmp_address));
-            SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information }
+            SubcmpSignal { cmp_address, uniform_parallel_value, is_output, input_information, counter_override}
         }
     }
 }