From 3758610c77d235335077bc706ed09945bc75d565 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Wed, 29 Jan 2025 22:15:20 +0000
Subject: [PATCH 1/9] initial logic for attributing more useless instructions
 to unroll more loops

---
 .../src/ssa/opt/loop_invariant.rs             |   7 +-
 .../noirc_evaluator/src/ssa/opt/unrolling.rs  | 173 ++++++++++++++++--
 2 files changed, 160 insertions(+), 20 deletions(-)
diff --git a/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs b/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs
index 1e2e783d516..169729a0f12 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs
@@ -60,6 +60,11 @@ impl Loops {
 
         context.map_dependent_instructions();
         context.inserter.map_data_bus_in_place();
+
+        // let new_values = context.loop_invariants.iter().map(|value| context.inserter.resolve(*value)).collect::<Vec<_>>();
+        // dbg!(new_values.clone());
+        // dbg!(context.defined_in_loop.clone());
+        // dbg!(context.loop_invariants.clone());
     }
 }
 
@@ -78,7 +83,7 @@ impl Loop {
     ///     jmpif v5 then: b3, else: b2
     /// ```
     /// In the example above, `v1` is the induction variable
-    fn get_induction_variable(&self, function: &Function) -> ValueId {
+    pub(crate) fn get_induction_variable(&self, function: &Function) -> ValueId {
         function.dfg.block_parameters(self.header)[0]
     }
 }
diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index eb0bbd8c532..ea1d803cbc1 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -21,7 +21,6 @@
 use std::collections::BTreeSet;
 
 use acvm::{acir::AcirField, FieldElement};
-use im::HashSet;
 
 use crate::{
     brillig::{
@@ -45,7 +44,7 @@ use crate::{
         ssa_gen::Ssa,
     },
 };
-use fxhash::FxHashMap as HashMap;
+use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
 
 impl Ssa {
     /// Loop unrolling can return errors, since ACIR functions need to be fully unrolled.
@@ -77,6 +76,7 @@ impl Ssa {
             // more finessing to convince the borrow checker that it's okay to share a read-only reference
             // to the globals and a mutable reference to the function at the same time, both part of the `Ssa`.
             if has_unrolled && is_brillig {
+                dbg!(max_bytecode_increase_percent.clone());
                 if let Some(max_incr_pct) = max_bytecode_increase_percent {
                     if global_cache.is_none() {
                         let globals = (*function.dfg.globals).clone();
@@ -159,10 +159,10 @@ pub(super) struct Loop {
 pub(super) struct Loops {
     /// The loops that failed to be unrolled so that we do not try to unroll them again.
     /// Each loop is identified by its header block id.
-    failed_to_unroll: HashSet<BasicBlockId>,
+    failed_to_unroll: im::HashSet<BasicBlockId>,
 
     pub(super) yet_to_unroll: Vec<Loop>,
-    modified_blocks: HashSet<BasicBlockId>,
+    modified_blocks: im::HashSet<BasicBlockId>,
     pub(super) cfg: ControlFlowGraph,
 }
 
@@ -221,10 +221,12 @@ impl Loops {
         // their loop range. We will start popping loops from the back.
         loops.sort_by_key(|loop_| loop_.blocks.len());
 
+        // dbg!(loops.clone());
+
         Self {
-            failed_to_unroll: HashSet::default(),
+            failed_to_unroll: im::HashSet::default(),
             yet_to_unroll: loops,
-            modified_blocks: HashSet::default(),
+            modified_blocks: im::HashSet::default(),
             cfg,
         }
     }
@@ -587,7 +589,7 @@ impl Loop {
         &self,
         function: &Function,
         cfg: &ControlFlowGraph,
-    ) -> Option<HashSet<ValueId>> {
+    ) -> Option<im::HashSet<ValueId>> {
         // We need to traverse blocks from the pre-header up to the block entry point.
         let pre_header = self.get_pre_header(function, cfg).ok()?;
         let function_entry = function.entry_block();
@@ -618,10 +620,11 @@ impl Loop {
     fn count_loads_and_stores(
         &self,
         function: &Function,
-        refs: &HashSet<ValueId>,
+        refs: &im::HashSet<ValueId>,
     ) -> (usize, usize) {
         let mut loads = 0;
         let mut stores = 0;
+        // let mut
         for block in &self.blocks {
             for instruction in function.dfg[*block].instructions() {
                 match &function.dfg[*instruction] {
@@ -674,6 +677,103 @@ impl Loop {
         self.boilerplate_stats(function, cfg).map(|s| s.is_small()).unwrap_or_default()
     }
 
+    fn values_defined_in_loop(&self, function: &Function) -> HashSet<ValueId> {
+        let mut defined_in_loop = HashSet::default();
+        for block in self.blocks.iter() {
+            let params = function.dfg.block_parameters(*block);
+            defined_in_loop.extend(params);
+            for instruction_id in function.dfg[*block].instructions() {
+                let results = function.dfg.instruction_results(*instruction_id);
+                defined_in_loop.extend(results);
+            }
+        }
+        defined_in_loop
+    }
+
+    /// Given the values defined inside the loop, we should be able to determine which instructions
+    /// are "useless" inside of a loop.
+    /// Certain instructions are "useless" instruction when they have certain properties:
+    /// All values are either:
+    /// - Constant
+    /// - Induction variable
+    /// - Or not defined in the loop
+    fn redefine_useful_results(
+        &self,
+        function: &Function,
+        mut defined_in_loop: HashSet<ValueId>,
+        refs: &im::HashSet<ValueId>,
+    ) -> usize {
+        let mut useless_instructions = 0;
+        let induction_var = self.get_induction_variable(function);
+        let mut new_induction_var = induction_var;
+        for block in &self.blocks {
+            for instruction in function.dfg[*block].instructions() {
+                let results = function.dfg.instruction_results(*instruction);
+                let instruction = &function.dfg[*instruction];
+                // TODO: unify how all these instructions are checked
+                match instruction {
+                    Instruction::Load { address } if refs.contains(address) => {
+                        if !defined_in_loop.contains(&address) {
+                            defined_in_loop.remove(&results[0]);
+                        }
+                    }
+                    Instruction::Store { value, .. } => {
+                        if *value == new_induction_var || *value == induction_var {
+                            dbg!("got here");
+                            return 0;
+                        }
+                    }
+                    Instruction::ArrayGet { array, index } => {
+                        let index_is_useless = induction_var == *index
+                            || function.dfg.is_constant(*index)
+                            || !defined_in_loop.contains(index);
+                        if !defined_in_loop.contains(array) && index_is_useless {
+                            defined_in_loop.remove(&results[0]);
+                            useless_instructions += 1;
+                        }
+                    }
+                    Instruction::ArraySet { array, index, value, mutable } => {
+                        let index_is_useless = induction_var == *index
+                            || function.dfg.is_constant(*index)
+                            || !defined_in_loop.contains(index);
+                        if !defined_in_loop.contains(array)
+                            && index_is_useless
+                            && !defined_in_loop.contains(value)
+                        {
+                            defined_in_loop.remove(&results[0]);
+                            useless_instructions += 1;
+                        }
+                    }
+                    Instruction::Binary(_) => {
+                        // TODO: need to check each of these values better
+                        // It is "useless" when:
+                        //  - We have a constant
+                        //  - It is an induction variable
+                        //  - It has not been defined in the loop
+                        let mut is_useless = true;
+                        instruction.for_each_value(|value| {
+                            is_useless &= !defined_in_loop.contains(&value)
+                                || value == induction_var
+                                || function.dfg.is_constant(value);
+                        });
+                        if is_useless {
+                            defined_in_loop.remove(&results[0]);
+                            useless_instructions += 1;
+                        }
+                    }
+                    Instruction::Cast(value, _) => {
+                        if *value == induction_var {
+                            dbg!("got here");
+                            new_induction_var = results[0];
+                        }
+                    }
+                    _ => {}
+                }
+            }
+        }
+        useless_instructions
+    }
+
     /// Collect boilerplate stats if we can figure out the upper and lower bounds of the loop,
     /// and the loop doesn't have multiple back-edges from breaks and continues.
     fn boilerplate_stats(
@@ -686,16 +786,42 @@ impl Loop {
         let upper = upper.try_to_u64()?;
         let refs = self.find_pre_header_reference_values(function, cfg)?;
 
+        let induction_var = self.get_induction_variable(function);
+        let mut uses_induction_var_outside = false;
+        for block in self.blocks.iter() {
+            let successors = function.dfg[*block].successors();
+            for successor in successors {
+                if !self.blocks.contains(&successor) {
+                    for instruction in function.dfg[successor].instructions() {
+                        let instruction = &function.dfg[*instruction];
+                        instruction.for_each_value(|value| {
+                            if value == induction_var {
+                                uses_induction_var_outside = true;
+                            }
+                        });
+                    }
+                }
+            }
+        }
+
+        let defined_in_loop = self.values_defined_in_loop(function);
+        let useless_instructions = if uses_induction_var_outside {
+            0
+        } else {
+            self.redefine_useful_results(function, defined_in_loop, &refs)
+        };
+        // let useless_instructions = self.redefine_useful_results(function, defined_in_loop, &refs);
+        dbg!(self.blocks.clone());
+        dbg!(useless_instructions);
         let (loads, stores) = self.count_loads_and_stores(function, &refs);
-        let increments = self.count_induction_increments(function);
         let all_instructions = self.count_all_instructions(function);
 
         Some(BoilerplateStats {
             iterations: (upper - lower) as usize,
             loads,
             stores,
-            increments,
             all_instructions,
+            useless_instructions,
         })
     }
 }
@@ -725,11 +851,13 @@ struct BoilerplateStats {
     loads: usize,
     /// Number of stores into pre-header references in the loop.
     stores: usize,
-    /// Number of increments to the induction variable (might be duplicated).
-    increments: usize,
     /// Number of instructions in the loop, including boilerplate,
     /// but excluding the boilerplate which is outside the loop.
     all_instructions: usize,
+    /// "Useless instructions"
+    /// TODO: find a better term for this
+    /// We do not have a loop invariant, but something that becomes simpler upon unrolling
+    useless_instructions: usize,
 }
 
 impl BoilerplateStats {
@@ -742,12 +870,15 @@ impl BoilerplateStats {
     /// Estimated number of _useful_ instructions, which is the ones in the loop
     /// minus all in-loop boilerplate.
     fn useful_instructions(&self) -> usize {
-        // Two jumps + plus the comparison with the upper bound
-        let boilerplate = 3;
+        // Two jumps
+        // The comparison with the upper bound and the increments to the induction variable
+        // are included in the `useless_instructions`
+        let boilerplate = 2;
         // Be conservative and only assume that mem2reg gets rid of load followed by store.
         // NB we have not checked that these are actual pairs.
         let load_and_store = self.loads.min(self.stores) * 2;
-        self.all_instructions - self.increments - load_and_store - boilerplate
+        self.all_instructions - load_and_store - boilerplate - self.useless_instructions
+        // self.all_instructions - self.increments - load_and_store - boilerplate
     }
 
     /// Estimated number of instructions if we unroll the loop.
@@ -760,6 +891,10 @@ impl BoilerplateStats {
     /// the blocks in tact with all the boilerplate involved in jumping, and the extra
     /// reference access instructions.
     fn is_small(&self) -> bool {
+        // dbg!(self.useful_instructions());
+        // dbg!(self.iterations);
+        // dbg!(self.unrolled_instructions());
+        // dbg!(self.baseline_instructions());
         self.unrolled_instructions() < self.baseline_instructions()
     }
 }
@@ -818,7 +953,7 @@ struct LoopIteration<'f> {
 
     /// Maps unrolled block ids back to the original source block ids
     original_blocks: HashMap<BasicBlockId, BasicBlockId>,
-    visited_blocks: HashSet<BasicBlockId>,
+    visited_blocks: im::HashSet<BasicBlockId>,
 
     insert_block: BasicBlockId,
     source_block: BasicBlockId,
@@ -844,7 +979,7 @@ impl<'f> LoopIteration<'f> {
             source_block,
             blocks: HashMap::default(),
             original_blocks: HashMap::default(),
-            visited_blocks: HashSet::default(),
+            visited_blocks: im::HashSet::default(),
             induction_value: None,
         }
     }
@@ -1211,7 +1346,7 @@ mod tests {
         let stats = loop0_stats(&ssa);
         assert_eq!(stats.iterations, 4);
         assert_eq!(stats.all_instructions, 2 + 5); // Instructions in b1 and b3
-        assert_eq!(stats.increments, 1);
+                                                   // assert_eq!(stats.increments, 1);
         assert_eq!(stats.loads, 1);
         assert_eq!(stats.stores, 1);
         assert_eq!(stats.useful_instructions(), 1); // Adding to sum
@@ -1225,7 +1360,7 @@ mod tests {
         let stats = loop0_stats(&ssa);
         assert_eq!(stats.iterations, 3);
         assert_eq!(stats.all_instructions, 2 + 8); // Instructions in b1 and b3
-        assert_eq!(stats.increments, 2);
+                                                   // assert_eq!(stats.increments, 2);
         assert_eq!(stats.loads, 1);
         assert_eq!(stats.stores, 1);
         assert_eq!(stats.useful_instructions(), 3); // array get, add, array set

From c779b6cfc0ead5e5d2aa5bcbb3e65f02002af1f2 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Thu, 30 Jan 2025 13:21:16 +0000
Subject: [PATCH 2/9] initial brillig more aggressiveness loop unrolling
 work,still a draft and needs tests

---
 compiler/noirc_evaluator/src/ssa.rs           |  6 +-
 .../noirc_evaluator/src/ssa/opt/unrolling.rs  | 81 ++++++++-----------
 .../global_var_regression_simple/src/main.nr  | 24 +++---
 .../unroll_loop_regression/Nargo.toml         |  6 ++
 .../unroll_loop_regression/Prover.toml        |  1 +
 .../unroll_loop_regression/src/main.nr        | 27 +++++++
 tooling/nargo_cli/build.rs                    |  4 +-
 7 files changed, 85 insertions(+), 64 deletions(-)
 create mode 100644 test_programs/execution_success/unroll_loop_regression/Nargo.toml
 create mode 100644 test_programs/execution_success/unroll_loop_regression/Prover.toml
 create mode 100644 test_programs/execution_success/unroll_loop_regression/src/main.nr

diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs
index eb93ef47ea3..b0cfb5dd023 100644
--- a/compiler/noirc_evaluator/src/ssa.rs
+++ b/compiler/noirc_evaluator/src/ssa.rs
@@ -174,7 +174,11 @@ fn optimize_all(builder: SsaBuilder, options: &SsaEvaluatorOptions) -> Result<Ss
         .run_pass(Ssa::loop_invariant_code_motion, "Loop Invariant Code Motion")
         .try_run_pass(
             |ssa| ssa.unroll_loops_iteratively(options.max_bytecode_increase_percent),
-            "Unrolling",
+            "Unrolling (1st)",
+        )?
+        .try_run_pass(
+            |ssa| ssa.unroll_loops_iteratively(options.max_bytecode_increase_percent),
+            "Unrolling (2nd)",
         )?
         .run_pass(Ssa::simplify_cfg, "Simplifying (2nd)")
         .run_pass(Ssa::mem2reg, "Mem2Reg (3rd)")
diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index 356561a7391..3f358b8d9b1 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -76,7 +76,9 @@ impl Ssa {
             // more finessing to convince the borrow checker that it's okay to share a read-only reference
             // to the globals and a mutable reference to the function at the same time, both part of the `Ssa`.
             if has_unrolled && is_brillig {
-                dbg!(max_bytecode_increase_percent.clone());
+                // TODO: As we unroll more loops, this is potentially going to lead to panics
+                // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved
+                // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators).
                 if let Some(max_incr_pct) = max_bytecode_increase_percent {
                     if global_cache.is_none() {
                         let globals = (*function.dfg.globals).clone();
@@ -144,6 +146,7 @@ impl Function {
     }
 }
 
+#[derive(Debug, Clone)]
 pub(super) struct Loop {
     /// The header block of a loop is the block which dominates all the
     /// other blocks in the loop.
@@ -222,8 +225,6 @@ impl Loops {
         // their loop range. We will start popping loops from the back.
         loops.sort_by_key(|loop_| loop_.blocks.len());
 
-        // dbg!(loops.clone());
-
         Self {
             failed_to_unroll: im::HashSet::default(),
             yet_to_unroll: loops,
@@ -654,22 +655,21 @@ impl Loop {
     /// Count the number of increments to the induction variable.
     /// It should be one, but it can be duplicated.
     /// The increment should be in the block where the back-edge was found.
-    fn count_induction_increments(&self, function: &Function) -> usize {
-        let back = &function.dfg[self.back_edge_start];
-        let header = &function.dfg[self.header];
-        let induction_var = header.parameters()[0];
-
-        back.instructions()
-            .iter()
-            .filter(|instruction| {
-                let instruction = &function.dfg[**instruction];
-                matches!(instruction,
-                    Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ })
-                        if *lhs == induction_var
-                )
-            })
-            .count()
-    }
+    // fn count_induction_increments(&self, function: &Function) -> usize {
+    //     let back = &function.dfg[self.back_edge_start];
+    //     let header = &function.dfg[self.header];
+    //     let induction_var = header.parameters()[0];
+    //     back.instructions()
+    //         .iter()
+    //         .filter(|instruction| {
+    //             let instruction = &function.dfg[**instruction];
+    //             matches!(instruction,
+    //                 Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ })
+    //                     if *lhs == induction_var
+    //             )
+    //         })
+    //         .count()
+    // }
 
     /// Decide if this loop is small enough that it can be inlined in a way that the number
     /// of unrolled instructions times the number of iterations would result in smaller bytecode
@@ -678,6 +678,8 @@ impl Loop {
         self.boilerplate_stats(function, cfg).map(|s| s.is_small()).unwrap_or_default()
     }
 
+    // TODO: unify this method with the one in the loop invariant pass
+    // probably can unify some other code too
     fn values_defined_in_loop(&self, function: &Function) -> HashSet<ValueId> {
         let mut defined_in_loop = HashSet::default();
         for block in self.blocks.iter() {
@@ -706,24 +708,17 @@ impl Loop {
     ) -> usize {
         let mut useless_instructions = 0;
         let induction_var = self.get_induction_variable(function);
-        let mut new_induction_var = induction_var;
         for block in &self.blocks {
             for instruction in function.dfg[*block].instructions() {
                 let results = function.dfg.instruction_results(*instruction);
                 let instruction = &function.dfg[*instruction];
-                // TODO: unify how all these instructions are checked
+                // TODO: unify how all these instructions are analyzed
                 match instruction {
                     Instruction::Load { address } if refs.contains(address) => {
-                        if !defined_in_loop.contains(&address) {
+                        if !defined_in_loop.contains(address) {
                             defined_in_loop.remove(&results[0]);
                         }
                     }
-                    Instruction::Store { value, .. } => {
-                        if *value == new_induction_var || *value == induction_var {
-                            dbg!("got here");
-                            return 0;
-                        }
-                    }
                     Instruction::ArrayGet { array, index } => {
                         let index_is_useless = induction_var == *index
                             || function.dfg.is_constant(*index)
@@ -733,7 +728,7 @@ impl Loop {
                             useless_instructions += 1;
                         }
                     }
-                    Instruction::ArraySet { array, index, value, mutable } => {
+                    Instruction::ArraySet { array, index, value, .. } => {
                         let index_is_useless = induction_var == *index
                             || function.dfg.is_constant(*index)
                             || !defined_in_loop.contains(index);
@@ -746,11 +741,6 @@ impl Loop {
                         }
                     }
                     Instruction::Binary(_) => {
-                        // TODO: need to check each of these values better
-                        // It is "useless" when:
-                        //  - We have a constant
-                        //  - It is an induction variable
-                        //  - It has not been defined in the loop
                         let mut is_useless = true;
                         instruction.for_each_value(|value| {
                             is_useless &= !defined_in_loop.contains(&value)
@@ -762,12 +752,6 @@ impl Loop {
                             useless_instructions += 1;
                         }
                     }
-                    Instruction::Cast(value, _) => {
-                        if *value == induction_var {
-                            dbg!("got here");
-                            new_induction_var = results[0];
-                        }
-                    }
                     _ => {}
                 }
             }
@@ -787,6 +771,10 @@ impl Loop {
         let upper = upper.try_to_u64()?;
         let refs = self.find_pre_header_reference_values(function, cfg)?;
 
+        // If we have a break block, we can potentially directly use the induction variable in that break.
+        // If we then unroll the loop, the induction variable will not exist anymore.
+        // TODO: we should appropriately unroll and account for the break
+        // TODO 2: move this logic out into its own method
         let induction_var = self.get_induction_variable(function);
         let mut uses_induction_var_outside = false;
         for block in self.blocks.iter() {
@@ -806,14 +794,13 @@ impl Loop {
         }
 
         let defined_in_loop = self.values_defined_in_loop(function);
+
         let useless_instructions = if uses_induction_var_outside {
             0
         } else {
             self.redefine_useful_results(function, defined_in_loop, &refs)
         };
-        // let useless_instructions = self.redefine_useful_results(function, defined_in_loop, &refs);
-        dbg!(self.blocks.clone());
-        dbg!(useless_instructions);
+
         let (loads, stores) = self.count_loads_and_stores(function, &refs);
         let all_instructions = self.count_all_instructions(function);
 
@@ -871,7 +858,8 @@ impl BoilerplateStats {
     /// Estimated number of _useful_ instructions, which is the ones in the loop
     /// minus all in-loop boilerplate.
     fn useful_instructions(&self) -> usize {
-        // Two jumps
+        // Boilerplate only includes two jumps for induction variable comparison
+        // and the back edge jump to the header.
         // The comparison with the upper bound and the increments to the induction variable
         // are included in the `useless_instructions`
         let boilerplate = 2;
@@ -879,7 +867,6 @@ impl BoilerplateStats {
         // NB we have not checked that these are actual pairs.
         let load_and_store = self.loads.min(self.stores) * 2;
         self.all_instructions - load_and_store - boilerplate - self.useless_instructions
-        // self.all_instructions - self.increments - load_and_store - boilerplate
     }
 
     /// Estimated number of instructions if we unroll the loop.
@@ -892,10 +879,6 @@ impl BoilerplateStats {
     /// the blocks in tact with all the boilerplate involved in jumping, and the extra
     /// reference access instructions.
     fn is_small(&self) -> bool {
-        // dbg!(self.useful_instructions());
-        // dbg!(self.iterations);
-        // dbg!(self.unrolled_instructions());
-        // dbg!(self.baseline_instructions());
         self.unrolled_instructions() < self.baseline_instructions()
     }
 }
diff --git a/test_programs/execution_success/global_var_regression_simple/src/main.nr b/test_programs/execution_success/global_var_regression_simple/src/main.nr
index b1bf753a73c..c437f5f8db4 100644
--- a/test_programs/execution_success/global_var_regression_simple/src/main.nr
+++ b/test_programs/execution_success/global_var_regression_simple/src/main.nr
@@ -7,19 +7,19 @@ fn main(x: Field, y: pub Field) {
             acc += EXPONENTIATE[i][j];
         }
     }
-    assert(!acc.lt(x));
+    // assert(!acc.lt(x));
     assert(x != y);
 
-    dummy_again(x, y);
+    // dummy_again(x, y);
 }
 
-fn dummy_again(x: Field, y: Field) {
-    let mut acc: Field = 0;
-    for i in 0..2 {
-        for j in 0..2 {
-            acc += EXPONENTIATE[i][j];
-        }
-    }
-    assert(!acc.lt(x));
-    assert(x != y);
-}
+// fn dummy_again(x: Field, y: Field) {
+//     let mut acc: Field = 0;
+//     for i in 0..2 {
+//         for j in 0..2 {
+//             acc += EXPONENTIATE[i][j];
+//         }
+//     }
+//     assert(!acc.lt(x));
+//     assert(x != y);
+// }
diff --git a/test_programs/execution_success/unroll_loop_regression/Nargo.toml b/test_programs/execution_success/unroll_loop_regression/Nargo.toml
new file mode 100644
index 00000000000..1f055d9de5c
--- /dev/null
+++ b/test_programs/execution_success/unroll_loop_regression/Nargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "unroll_loop_regression"
+type = "bin"
+authors = [""]
+
+[dependencies]
\ No newline at end of file
diff --git a/test_programs/execution_success/unroll_loop_regression/Prover.toml b/test_programs/execution_success/unroll_loop_regression/Prover.toml
new file mode 100644
index 00000000000..5ca6e3c8812
--- /dev/null
+++ b/test_programs/execution_success/unroll_loop_regression/Prover.toml
@@ -0,0 +1 @@
+fracs = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
\ No newline at end of file
diff --git a/test_programs/execution_success/unroll_loop_regression/src/main.nr b/test_programs/execution_success/unroll_loop_regression/src/main.nr
new file mode 100644
index 00000000000..294eabfee63
--- /dev/null
+++ b/test_programs/execution_success/unroll_loop_regression/src/main.nr
@@ -0,0 +1,27 @@
+global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, 15, 16];
+
+unconstrained fn main(
+    fracs: [Field; 16],
+) -> pub [Field; 2] {
+    let mut partial_sums: [Field; 2] = std::mem::zeroed();
+
+    let mut partial_sum: Field = 0;
+    for i in 0..8 {
+        let summand = ROOTS[i] * fracs[i];
+        partial_sum = partial_sum + summand;
+    }
+    partial_sums[0] = partial_sum;
+
+    let NUM_PARTIAL_SUMS = 2;
+    for i in 1..NUM_PARTIAL_SUMS {
+        let mut partial_sum: Field = partial_sums[i - 1];
+        for j in 0..8 {
+            let k = i * 8 + j;
+            let summand = ROOTS[k]* fracs[k];
+            partial_sum = partial_sum + summand;
+        }
+        partial_sums[i] = partial_sum;
+    }
+
+    partial_sums
+}
\ No newline at end of file
diff --git a/tooling/nargo_cli/build.rs b/tooling/nargo_cli/build.rs
index 5e101bc0483..e6559fb5bc0 100644
--- a/tooling/nargo_cli/build.rs
+++ b/tooling/nargo_cli/build.rs
@@ -198,8 +198,8 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner
         nargo.arg("--force-brillig");
 
         // Set the maximum increase so that part of the optimization is exercised (it might fail).
-        nargo.arg("--max-bytecode-increase-percent");
-        nargo.arg("50");
+        // nargo.arg("--max-bytecode-increase-percent");
+        // nargo.arg("50");
     }}
 
     {test_content}

From f21ee1ae92a9539db400d0a0a210ee94a9013b85 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Thu, 30 Jan 2025 13:24:55 +0000
Subject: [PATCH 3/9] restore global_var_regression_simple test

---
 .../global_var_regression_simple/src/main.nr  | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/test_programs/execution_success/global_var_regression_simple/src/main.nr b/test_programs/execution_success/global_var_regression_simple/src/main.nr
index c437f5f8db4..b1bf753a73c 100644
--- a/test_programs/execution_success/global_var_regression_simple/src/main.nr
+++ b/test_programs/execution_success/global_var_regression_simple/src/main.nr
@@ -7,19 +7,19 @@ fn main(x: Field, y: pub Field) {
             acc += EXPONENTIATE[i][j];
         }
     }
-    // assert(!acc.lt(x));
+    assert(!acc.lt(x));
     assert(x != y);
 
-    // dummy_again(x, y);
+    dummy_again(x, y);
 }
 
-// fn dummy_again(x: Field, y: Field) {
-//     let mut acc: Field = 0;
-//     for i in 0..2 {
-//         for j in 0..2 {
-//             acc += EXPONENTIATE[i][j];
-//         }
-//     }
-//     assert(!acc.lt(x));
-//     assert(x != y);
-// }
+fn dummy_again(x: Field, y: Field) {
+    let mut acc: Field = 0;
+    for i in 0..2 {
+        for j in 0..2 {
+            acc += EXPONENTIATE[i][j];
+        }
+    }
+    assert(!acc.lt(x));
+    assert(x != y);
+}

From 892c528f33548d6dce55d0ec236277a94ea150b0 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Thu, 30 Jan 2025 13:30:04 +0000
Subject: [PATCH 4/9] nargo fmt

---
 .../unroll_loop_regression/src/main.nr                 | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/test_programs/execution_success/unroll_loop_regression/src/main.nr b/test_programs/execution_success/unroll_loop_regression/src/main.nr
index 294eabfee63..e0dfe23c087 100644
--- a/test_programs/execution_success/unroll_loop_regression/src/main.nr
+++ b/test_programs/execution_success/unroll_loop_regression/src/main.nr
@@ -1,8 +1,6 @@
-global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, 15, 16];
+global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
 
-unconstrained fn main(
-    fracs: [Field; 16],
-) -> pub [Field; 2] {
+unconstrained fn main(fracs: [Field; 16]) -> pub [Field; 2] {
     let mut partial_sums: [Field; 2] = std::mem::zeroed();
 
     let mut partial_sum: Field = 0;
@@ -17,11 +15,11 @@ unconstrained fn main(
         let mut partial_sum: Field = partial_sums[i - 1];
         for j in 0..8 {
             let k = i * 8 + j;
-            let summand = ROOTS[k]* fracs[k];
+            let summand = ROOTS[k] * fracs[k];
             partial_sum = partial_sum + summand;
         }
         partial_sums[i] = partial_sum;
     }
 
     partial_sums
-}
\ No newline at end of file
+}

From 735064038dec2a71473b6dca90fb316382d3e308 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Thu, 30 Jan 2025 19:05:43 +0000
Subject: [PATCH 5/9] use num ssa instructions for bytecode size increase

---
 .../noirc_evaluator/src/ssa/ir/function.rs    | 10 +++++
 .../noirc_evaluator/src/ssa/opt/unrolling.rs  | 40 ++-----------------
 tooling/nargo_cli/build.rs                    |  4 +-
 3 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/compiler/noirc_evaluator/src/ssa/ir/function.rs b/compiler/noirc_evaluator/src/ssa/ir/function.rs
index 6a0659e81bf..e7748b5f13f 100644
--- a/compiler/noirc_evaluator/src/ssa/ir/function.rs
+++ b/compiler/noirc_evaluator/src/ssa/ir/function.rs
@@ -211,6 +211,16 @@ impl Function {
 
         unreachable!("SSA Function {} has no reachable return instruction!", self.id())
     }
+
+    pub(crate) fn num_instructions(&self) -> usize {
+        self.reachable_blocks()
+            .iter()
+            .map(|block| {
+                let block = &self.dfg[*block];
+                block.instructions().len() + block.terminator().is_some() as usize
+            })
+            .sum()
+    }
 }
 
 impl Clone for Function {
diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index 3f358b8d9b1..430cddab1e4 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -23,10 +23,6 @@ use std::collections::BTreeSet;
 use acvm::{acir::AcirField, FieldElement};
 
 use crate::{
-    brillig::{
-        brillig_gen::{brillig_globals::convert_ssa_globals, convert_ssa_function},
-        brillig_ir::brillig_variable::BrilligVariable,
-    },
     errors::RuntimeError,
     ssa::{
         ir::{
@@ -59,7 +55,7 @@ impl Ssa {
         mut self,
         max_bytecode_increase_percent: Option<i32>,
     ) -> Result<Ssa, RuntimeError> {
-        let mut global_cache = None;
+        // let mut global_cache = None;
 
         for function in self.functions.values_mut() {
             let is_brillig = function.runtime().is_brillig();
@@ -80,20 +76,9 @@ impl Ssa {
                 // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved
                 // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators).
                 if let Some(max_incr_pct) = max_bytecode_increase_percent {
-                    if global_cache.is_none() {
-                        let globals = (*function.dfg.globals).clone();
-                        let used_globals = &globals.values_iter().map(|(id, _)| id).collect();
-                        let globals_dfg = DataFlowGraph::from(globals);
-                        // DIE is run at the end of our SSA optimizations, so we mark all globals as in use here.
-                        let (_, brillig_globals, _) =
-                            convert_ssa_globals(false, &globals_dfg, used_globals, function.id());
-                        global_cache = Some(brillig_globals);
-                    }
-                    let brillig_globals = global_cache.as_ref().unwrap();
-
                     let orig_function = orig_function.expect("took snapshot to compare");
-                    let new_size = brillig_bytecode_size(function, brillig_globals);
-                    let orig_size = brillig_bytecode_size(&orig_function, brillig_globals);
+                    let new_size = function.num_instructions();
+                    let orig_size = function.num_instructions();
                     if !is_new_size_ok(orig_size, new_size, max_incr_pct) {
                         *function = orig_function;
                     }
@@ -1140,25 +1125,6 @@ fn simplify_between_unrolls(function: &mut Function) {
     function.mem2reg();
 }
 
-/// Convert the function to Brillig bytecode and return the resulting size.
-fn brillig_bytecode_size(
-    function: &Function,
-    globals: &HashMap<ValueId, BrilligVariable>,
-) -> usize {
-    // We need to do some SSA passes in order for the conversion to be able to go ahead,
-    // otherwise we can hit `unreachable!()` instructions in `convert_ssa_instruction`.
-    // Creating a clone so as not to modify the originals.
-    let mut temp = function.clone();
-
-    // Might as well give it the best chance.
-    simplify_between_unrolls(&mut temp);
-
-    // This is to try to prevent hitting ICE.
-    temp.dead_instruction_elimination(false, true);
-
-    convert_ssa_function(&temp, false, globals).byte_code.len()
-}
-
 /// Decide if the new bytecode size is acceptable, compared to the original.
 ///
 /// The maximum increase can be expressed as a negative value if we demand a decrease.
diff --git a/tooling/nargo_cli/build.rs b/tooling/nargo_cli/build.rs
index e6559fb5bc0..5e101bc0483 100644
--- a/tooling/nargo_cli/build.rs
+++ b/tooling/nargo_cli/build.rs
@@ -198,8 +198,8 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner
         nargo.arg("--force-brillig");
 
         // Set the maximum increase so that part of the optimization is exercised (it might fail).
-        // nargo.arg("--max-bytecode-increase-percent");
-        // nargo.arg("50");
+        nargo.arg("--max-bytecode-increase-percent");
+        nargo.arg("50");
     }}
 
     {test_content}

From a739ae042e85df23326087def9badf678be9a84b Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Fri, 31 Jan 2025 14:55:26 +0000
Subject: [PATCH 6/9] dont use none for bytecode size increase

---
 compiler/noirc_driver/src/lib.rs                  |  4 ++--
 compiler/noirc_evaluator/src/ssa.rs               |  2 +-
 compiler/noirc_evaluator/src/ssa/opt/hint.rs      |  2 +-
 compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 12 ++++++------
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/compiler/noirc_driver/src/lib.rs b/compiler/noirc_driver/src/lib.rs
index 62186a0c9e8..685f6f89ac5 100644
--- a/compiler/noirc_driver/src/lib.rs
+++ b/compiler/noirc_driver/src/lib.rs
@@ -156,8 +156,8 @@ pub struct CompileOptions {
     /// as it required fewer SSA instructions.
     /// A higher value results in fewer jumps but a larger program.
     /// A lower value keeps the original program if it was smaller, even if it has more jumps.
-    #[arg(long, hide = true, allow_hyphen_values = true)]
-    pub max_bytecode_increase_percent: Option<i32>,
+    #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = i32::MAX)]
+    pub max_bytecode_increase_percent: i32,
 
     /// Use pedantic ACVM solving, i.e. double-check some black-box function
     /// assumptions when solving.
diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs
index abe50bf91e5..609bb03357f 100644
--- a/compiler/noirc_evaluator/src/ssa.rs
+++ b/compiler/noirc_evaluator/src/ssa.rs
@@ -77,7 +77,7 @@ pub struct SsaEvaluatorOptions {
     /// Maximum accepted percentage increase in the Brillig bytecode size after unrolling loops.
     /// When `None` the size increase check is skipped altogether and any decrease in the SSA
     /// instruction count is accepted.
-    pub max_bytecode_increase_percent: Option<i32>,
+    pub max_bytecode_increase_percent: i32,
 }
 
 pub(crate) struct ArtifactsAndWarnings(Artifacts, Vec<SsaReport>);
diff --git a/compiler/noirc_evaluator/src/ssa/opt/hint.rs b/compiler/noirc_evaluator/src/ssa/opt/hint.rs
index 4897799f371..db79a932954 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/hint.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/hint.rs
@@ -20,7 +20,7 @@ mod tests {
             skip_underconstrained_check: true,
             enable_brillig_constraints_check: false,
             inliner_aggressiveness: 0,
-            max_bytecode_increase_percent: None,
+            max_bytecode_increase_percent: i32::MAX,
         };
 
         let builder = SsaBuilder {
diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index eb5fe6873f5..2e753e0249c 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -53,14 +53,14 @@ impl Ssa {
     #[tracing::instrument(level = "trace", skip(self))]
     pub(crate) fn unroll_loops_iteratively(
         mut self,
-        max_bytecode_increase_percent: Option<i32>,
+        max_bytecode_increase_percent: i32,
     ) -> Result<Ssa, RuntimeError> {
         for function in self.functions.values_mut() {
             let is_brillig = function.runtime().is_brillig();
 
             // Take a snapshot in case we have to restore it.
             let orig_function =
-                (max_bytecode_increase_percent.is_some() && is_brillig).then(|| function.clone());
+                (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone());
 
             // We must be able to unroll ACIR loops at this point, so exit on failure to unroll.
             let has_unrolled = function.unroll_loops_iteratively()?;
@@ -73,11 +73,11 @@ impl Ssa {
                 // TODO: As we unroll more loops, this is potentially going to lead to panics
                 // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved
                 // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators).
-                if let Some(max_incr_pct) = max_bytecode_increase_percent {
+                if max_bytecode_increase_percent < i32::MAX {
                     let orig_function = orig_function.expect("took snapshot to compare");
                     let new_size = function.num_instructions();
                     let orig_size = function.num_instructions();
-                    if !is_new_size_ok(orig_size, new_size, max_incr_pct) {
+                    if !is_new_size_ok(orig_size, new_size, max_bytecode_increase_percent) {
                         *function = orig_function;
                     }
                 }
@@ -1416,7 +1416,7 @@ mod tests {
     #[test]
     fn test_brillig_unroll_iteratively_respects_max_increase() {
         let ssa = brillig_unroll_test_case();
-        let ssa = ssa.unroll_loops_iteratively(Some(-90)).unwrap();
+        let ssa = ssa.unroll_loops_iteratively(-90).unwrap();
         // Check that it's still the original
         assert_normalized_ssa_equals(ssa, brillig_unroll_test_case().to_string().as_str());
     }
@@ -1424,7 +1424,7 @@ mod tests {
     #[test]
     fn test_brillig_unroll_iteratively_with_large_max_increase() {
         let ssa = brillig_unroll_test_case();
-        let ssa = ssa.unroll_loops_iteratively(Some(50)).unwrap();
+        let ssa = ssa.unroll_loops_iteratively(50).unwrap();
         // Check that it did the unroll
         assert_eq!(ssa.main().reachable_blocks().len(), 2, "The loop should be unrolled");
     }

From b0b0cca3972a690af66e517e90a42b7bf668bd97 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Fri, 31 Jan 2025 09:57:22 -0500
Subject: [PATCH 7/9] Update compiler/noirc_evaluator/src/ssa/opt/unrolling.rs

---
 compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index 2e753e0249c..4414a2af750 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -60,7 +60,7 @@ impl Ssa {
 
             // Take a snapshot in case we have to restore it.
             let orig_function =
-                (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone());
+                (max_bytecode_increase_percent < i32::MAX && is_brillig).then(|| function.clone());
 
             // We must be able to unroll ACIR loops at this point, so exit on failure to unroll.
             let has_unrolled = function.unroll_loops_iteratively()?;

From 9a393fdcdb8b31d58c7f617310bce51a8693dc9c Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Fri, 31 Jan 2025 14:57:46 +0000
Subject: [PATCH 8/9] default bytecode size increase of 0

---
 compiler/noirc_driver/src/lib.rs                  | 2 +-
 compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler/noirc_driver/src/lib.rs b/compiler/noirc_driver/src/lib.rs
index 685f6f89ac5..0f6fb7773b7 100644
--- a/compiler/noirc_driver/src/lib.rs
+++ b/compiler/noirc_driver/src/lib.rs
@@ -156,7 +156,7 @@ pub struct CompileOptions {
     /// as it required fewer SSA instructions.
     /// A higher value results in fewer jumps but a larger program.
     /// A lower value keeps the original program if it was smaller, even if it has more jumps.
-    #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = i32::MAX)]
+    #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = 0)]
     pub max_bytecode_increase_percent: i32,
 
     /// Use pedantic ACVM solving, i.e. double-check some black-box function
diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
index 2e753e0249c..4414a2af750 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs
@@ -60,7 +60,7 @@ impl Ssa {
 
             // Take a snapshot in case we have to restore it.
             let orig_function =
-                (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone());
+                (max_bytecode_increase_percent < i32::MAX && is_brillig).then(|| function.clone());
 
             // We must be able to unroll ACIR loops at this point, so exit on failure to unroll.
             let has_unrolled = function.unroll_loops_iteratively()?;

From c4f767c9aa0d4893f204a3bb5577701fad95b4f9 Mon Sep 17 00:00:00 2001
From: Maxim Vezenov <mvezenov@gmail.com>
Date: Fri, 31 Jan 2025 20:00:45 +0000
Subject: [PATCH 9/9] empty commit