From 3758610c77d235335077bc706ed09945bc75d565 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Wed, 29 Jan 2025 22:15:20 +0000 Subject: [PATCH 1/9] initial logic for attributing more useless instructions to unroll more loops --- .../src/ssa/opt/loop_invariant.rs | 7 +- .../noirc_evaluator/src/ssa/opt/unrolling.rs | 173 ++++++++++++++++-- 2 files changed, 160 insertions(+), 20 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs b/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs index 1e2e783d516..169729a0f12 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/loop_invariant.rs @@ -60,6 +60,11 @@ impl Loops { context.map_dependent_instructions(); context.inserter.map_data_bus_in_place(); + + // let new_values = context.loop_invariants.iter().map(|value| context.inserter.resolve(*value)).collect::>(); + // dbg!(new_values.clone()); + // dbg!(context.defined_in_loop.clone()); + // dbg!(context.loop_invariants.clone()); } } @@ -78,7 +83,7 @@ impl Loop { /// jmpif v5 then: b3, else: b2 /// ``` /// In the example above, `v1` is the induction variable - fn get_induction_variable(&self, function: &Function) -> ValueId { + pub(crate) fn get_induction_variable(&self, function: &Function) -> ValueId { function.dfg.block_parameters(self.header)[0] } } diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index eb0bbd8c532..ea1d803cbc1 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -21,7 +21,6 @@ use std::collections::BTreeSet; use acvm::{acir::AcirField, FieldElement}; -use im::HashSet; use crate::{ brillig::{ @@ -45,7 +44,7 @@ use crate::{ ssa_gen::Ssa, }, }; -use fxhash::FxHashMap as HashMap; +use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet}; impl Ssa { /// Loop unrolling can return errors, since ACIR functions need to be fully unrolled. @@ -77,6 +76,7 @@ impl Ssa { // more finessing to convince the borrow checker that it's okay to share a read-only reference // to the globals and a mutable reference to the function at the same time, both part of the `Ssa`. if has_unrolled && is_brillig { + dbg!(max_bytecode_increase_percent.clone()); if let Some(max_incr_pct) = max_bytecode_increase_percent { if global_cache.is_none() { let globals = (*function.dfg.globals).clone(); @@ -159,10 +159,10 @@ pub(super) struct Loop { pub(super) struct Loops { /// The loops that failed to be unrolled so that we do not try to unroll them again. /// Each loop is identified by its header block id. - failed_to_unroll: HashSet, + failed_to_unroll: im::HashSet, pub(super) yet_to_unroll: Vec, - modified_blocks: HashSet, + modified_blocks: im::HashSet, pub(super) cfg: ControlFlowGraph, } @@ -221,10 +221,12 @@ impl Loops { // their loop range. We will start popping loops from the back. loops.sort_by_key(|loop_| loop_.blocks.len()); + // dbg!(loops.clone()); + Self { - failed_to_unroll: HashSet::default(), + failed_to_unroll: im::HashSet::default(), yet_to_unroll: loops, - modified_blocks: HashSet::default(), + modified_blocks: im::HashSet::default(), cfg, } } @@ -587,7 +589,7 @@ impl Loop { &self, function: &Function, cfg: &ControlFlowGraph, - ) -> Option> { + ) -> Option> { // We need to traverse blocks from the pre-header up to the block entry point. let pre_header = self.get_pre_header(function, cfg).ok()?; let function_entry = function.entry_block(); @@ -618,10 +620,11 @@ impl Loop { fn count_loads_and_stores( &self, function: &Function, - refs: &HashSet, + refs: &im::HashSet, ) -> (usize, usize) { let mut loads = 0; let mut stores = 0; + // let mut for block in &self.blocks { for instruction in function.dfg[*block].instructions() { match &function.dfg[*instruction] { @@ -674,6 +677,103 @@ impl Loop { self.boilerplate_stats(function, cfg).map(|s| s.is_small()).unwrap_or_default() } + fn values_defined_in_loop(&self, function: &Function) -> HashSet { + let mut defined_in_loop = HashSet::default(); + for block in self.blocks.iter() { + let params = function.dfg.block_parameters(*block); + defined_in_loop.extend(params); + for instruction_id in function.dfg[*block].instructions() { + let results = function.dfg.instruction_results(*instruction_id); + defined_in_loop.extend(results); + } + } + defined_in_loop + } + + /// Given the values defined inside the loop, we should be able to determine which instructions + /// are "useless" inside of a loop. + /// Certain instructions are "useless" instruction when they have certain properties: + /// All values are either: + /// - Constant + /// - Induction variable + /// - Or not defined in the loop + fn redefine_useful_results( + &self, + function: &Function, + mut defined_in_loop: HashSet, + refs: &im::HashSet, + ) -> usize { + let mut useless_instructions = 0; + let induction_var = self.get_induction_variable(function); + let mut new_induction_var = induction_var; + for block in &self.blocks { + for instruction in function.dfg[*block].instructions() { + let results = function.dfg.instruction_results(*instruction); + let instruction = &function.dfg[*instruction]; + // TODO: unify how all these instructions are checked + match instruction { + Instruction::Load { address } if refs.contains(address) => { + if !defined_in_loop.contains(&address) { + defined_in_loop.remove(&results[0]); + } + } + Instruction::Store { value, .. } => { + if *value == new_induction_var || *value == induction_var { + dbg!("got here"); + return 0; + } + } + Instruction::ArrayGet { array, index } => { + let index_is_useless = induction_var == *index + || function.dfg.is_constant(*index) + || !defined_in_loop.contains(index); + if !defined_in_loop.contains(array) && index_is_useless { + defined_in_loop.remove(&results[0]); + useless_instructions += 1; + } + } + Instruction::ArraySet { array, index, value, mutable } => { + let index_is_useless = induction_var == *index + || function.dfg.is_constant(*index) + || !defined_in_loop.contains(index); + if !defined_in_loop.contains(array) + && index_is_useless + && !defined_in_loop.contains(value) + { + defined_in_loop.remove(&results[0]); + useless_instructions += 1; + } + } + Instruction::Binary(_) => { + // TODO: need to check each of these values better + // It is "useless" when: + // - We have a constant + // - It is an induction variable + // - It has not been defined in the loop + let mut is_useless = true; + instruction.for_each_value(|value| { + is_useless &= !defined_in_loop.contains(&value) + || value == induction_var + || function.dfg.is_constant(value); + }); + if is_useless { + defined_in_loop.remove(&results[0]); + useless_instructions += 1; + } + } + Instruction::Cast(value, _) => { + if *value == induction_var { + dbg!("got here"); + new_induction_var = results[0]; + } + } + _ => {} + } + } + } + useless_instructions + } + /// Collect boilerplate stats if we can figure out the upper and lower bounds of the loop, /// and the loop doesn't have multiple back-edges from breaks and continues. fn boilerplate_stats( @@ -686,16 +786,42 @@ impl Loop { let upper = upper.try_to_u64()?; let refs = self.find_pre_header_reference_values(function, cfg)?; + let induction_var = self.get_induction_variable(function); + let mut uses_induction_var_outside = false; + for block in self.blocks.iter() { + let successors = function.dfg[*block].successors(); + for successor in successors { + if !self.blocks.contains(&successor) { + for instruction in function.dfg[successor].instructions() { + let instruction = &function.dfg[*instruction]; + instruction.for_each_value(|value| { + if value == induction_var { + uses_induction_var_outside = true; + } + }); + } + } + } + } + + let defined_in_loop = self.values_defined_in_loop(function); + let useless_instructions = if uses_induction_var_outside { + 0 + } else { + self.redefine_useful_results(function, defined_in_loop, &refs) + }; + // let useless_instructions = self.redefine_useful_results(function, defined_in_loop, &refs); + dbg!(self.blocks.clone()); + dbg!(useless_instructions); let (loads, stores) = self.count_loads_and_stores(function, &refs); - let increments = self.count_induction_increments(function); let all_instructions = self.count_all_instructions(function); Some(BoilerplateStats { iterations: (upper - lower) as usize, loads, stores, - increments, all_instructions, + useless_instructions, }) } } @@ -725,11 +851,13 @@ struct BoilerplateStats { loads: usize, /// Number of stores into pre-header references in the loop. stores: usize, - /// Number of increments to the induction variable (might be duplicated). - increments: usize, /// Number of instructions in the loop, including boilerplate, /// but excluding the boilerplate which is outside the loop. all_instructions: usize, + /// "Useless instructions" + /// TODO: find a better term for this + /// We do not have a loop invariant, but something that becomes simpler upon unrolling + useless_instructions: usize, } impl BoilerplateStats { @@ -742,12 +870,15 @@ impl BoilerplateStats { /// Estimated number of _useful_ instructions, which is the ones in the loop /// minus all in-loop boilerplate. fn useful_instructions(&self) -> usize { - // Two jumps + plus the comparison with the upper bound - let boilerplate = 3; + // Two jumps + // The comparison with the upper bound and the increments to the induction variable + // are included in the `useless_instructions` + let boilerplate = 2; // Be conservative and only assume that mem2reg gets rid of load followed by store. // NB we have not checked that these are actual pairs. let load_and_store = self.loads.min(self.stores) * 2; - self.all_instructions - self.increments - load_and_store - boilerplate + self.all_instructions - load_and_store - boilerplate - self.useless_instructions + // self.all_instructions - self.increments - load_and_store - boilerplate } /// Estimated number of instructions if we unroll the loop. @@ -760,6 +891,10 @@ impl BoilerplateStats { /// the blocks in tact with all the boilerplate involved in jumping, and the extra /// reference access instructions. fn is_small(&self) -> bool { + // dbg!(self.useful_instructions()); + // dbg!(self.iterations); + // dbg!(self.unrolled_instructions()); + // dbg!(self.baseline_instructions()); self.unrolled_instructions() < self.baseline_instructions() } } @@ -818,7 +953,7 @@ struct LoopIteration<'f> { /// Maps unrolled block ids back to the original source block ids original_blocks: HashMap, - visited_blocks: HashSet, + visited_blocks: im::HashSet, insert_block: BasicBlockId, source_block: BasicBlockId, @@ -844,7 +979,7 @@ impl<'f> LoopIteration<'f> { source_block, blocks: HashMap::default(), original_blocks: HashMap::default(), - visited_blocks: HashSet::default(), + visited_blocks: im::HashSet::default(), induction_value: None, } } @@ -1211,7 +1346,7 @@ mod tests { let stats = loop0_stats(&ssa); assert_eq!(stats.iterations, 4); assert_eq!(stats.all_instructions, 2 + 5); // Instructions in b1 and b3 - assert_eq!(stats.increments, 1); + // assert_eq!(stats.increments, 1); assert_eq!(stats.loads, 1); assert_eq!(stats.stores, 1); assert_eq!(stats.useful_instructions(), 1); // Adding to sum @@ -1225,7 +1360,7 @@ mod tests { let stats = loop0_stats(&ssa); assert_eq!(stats.iterations, 3); assert_eq!(stats.all_instructions, 2 + 8); // Instructions in b1 and b3 - assert_eq!(stats.increments, 2); + // assert_eq!(stats.increments, 2); assert_eq!(stats.loads, 1); assert_eq!(stats.stores, 1); assert_eq!(stats.useful_instructions(), 3); // array get, add, array set From c779b6cfc0ead5e5d2aa5bcbb3e65f02002af1f2 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Thu, 30 Jan 2025 13:21:16 +0000 Subject: [PATCH 2/9] initial brillig more aggressiveness loop unrolling work,still a draft and needs tests --- compiler/noirc_evaluator/src/ssa.rs | 6 +- .../noirc_evaluator/src/ssa/opt/unrolling.rs | 81 ++++++++----------- .../global_var_regression_simple/src/main.nr | 24 +++--- .../unroll_loop_regression/Nargo.toml | 6 ++ .../unroll_loop_regression/Prover.toml | 1 + .../unroll_loop_regression/src/main.nr | 27 +++++++ tooling/nargo_cli/build.rs | 4 +- 7 files changed, 85 insertions(+), 64 deletions(-) create mode 100644 test_programs/execution_success/unroll_loop_regression/Nargo.toml create mode 100644 test_programs/execution_success/unroll_loop_regression/Prover.toml create mode 100644 test_programs/execution_success/unroll_loop_regression/src/main.nr diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs index eb93ef47ea3..b0cfb5dd023 100644 --- a/compiler/noirc_evaluator/src/ssa.rs +++ b/compiler/noirc_evaluator/src/ssa.rs @@ -174,7 +174,11 @@ fn optimize_all(builder: SsaBuilder, options: &SsaEvaluatorOptions) -> Result usize { - let back = &function.dfg[self.back_edge_start]; - let header = &function.dfg[self.header]; - let induction_var = header.parameters()[0]; - - back.instructions() - .iter() - .filter(|instruction| { - let instruction = &function.dfg[**instruction]; - matches!(instruction, - Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ }) - if *lhs == induction_var - ) - }) - .count() - } + // fn count_induction_increments(&self, function: &Function) -> usize { + // let back = &function.dfg[self.back_edge_start]; + // let header = &function.dfg[self.header]; + // let induction_var = header.parameters()[0]; + // back.instructions() + // .iter() + // .filter(|instruction| { + // let instruction = &function.dfg[**instruction]; + // matches!(instruction, + // Instruction::Binary(Binary { lhs, operator: BinaryOp::Add { .. }, rhs: _ }) + // if *lhs == induction_var + // ) + // }) + // .count() + // } /// Decide if this loop is small enough that it can be inlined in a way that the number /// of unrolled instructions times the number of iterations would result in smaller bytecode @@ -678,6 +678,8 @@ impl Loop { self.boilerplate_stats(function, cfg).map(|s| s.is_small()).unwrap_or_default() } + // TODO: unify this method with the one in the loop invariant pass + // probably can unify some other code too fn values_defined_in_loop(&self, function: &Function) -> HashSet { let mut defined_in_loop = HashSet::default(); for block in self.blocks.iter() { @@ -706,24 +708,17 @@ impl Loop { ) -> usize { let mut useless_instructions = 0; let induction_var = self.get_induction_variable(function); - let mut new_induction_var = induction_var; for block in &self.blocks { for instruction in function.dfg[*block].instructions() { let results = function.dfg.instruction_results(*instruction); let instruction = &function.dfg[*instruction]; - // TODO: unify how all these instructions are checked + // TODO: unify how all these instructions are analyzed match instruction { Instruction::Load { address } if refs.contains(address) => { - if !defined_in_loop.contains(&address) { + if !defined_in_loop.contains(address) { defined_in_loop.remove(&results[0]); } } - Instruction::Store { value, .. } => { - if *value == new_induction_var || *value == induction_var { - dbg!("got here"); - return 0; - } - } Instruction::ArrayGet { array, index } => { let index_is_useless = induction_var == *index || function.dfg.is_constant(*index) @@ -733,7 +728,7 @@ impl Loop { useless_instructions += 1; } } - Instruction::ArraySet { array, index, value, mutable } => { + Instruction::ArraySet { array, index, value, .. } => { let index_is_useless = induction_var == *index || function.dfg.is_constant(*index) || !defined_in_loop.contains(index); @@ -746,11 +741,6 @@ impl Loop { } } Instruction::Binary(_) => { - // TODO: need to check each of these values better - // It is "useless" when: - // - We have a constant - // - It is an induction variable - // - It has not been defined in the loop let mut is_useless = true; instruction.for_each_value(|value| { is_useless &= !defined_in_loop.contains(&value) @@ -762,12 +752,6 @@ impl Loop { useless_instructions += 1; } } - Instruction::Cast(value, _) => { - if *value == induction_var { - dbg!("got here"); - new_induction_var = results[0]; - } - } _ => {} } } @@ -787,6 +771,10 @@ impl Loop { let upper = upper.try_to_u64()?; let refs = self.find_pre_header_reference_values(function, cfg)?; + // If we have a break block, we can potentially directly use the induction variable in that break. + // If we then unroll the loop, the induction variable will not exist anymore. + // TODO: we should appropriately unroll and account for the break + // TODO 2: move this logic out into its own method let induction_var = self.get_induction_variable(function); let mut uses_induction_var_outside = false; for block in self.blocks.iter() { @@ -806,14 +794,13 @@ impl Loop { } let defined_in_loop = self.values_defined_in_loop(function); + let useless_instructions = if uses_induction_var_outside { 0 } else { self.redefine_useful_results(function, defined_in_loop, &refs) }; - // let useless_instructions = self.redefine_useful_results(function, defined_in_loop, &refs); - dbg!(self.blocks.clone()); - dbg!(useless_instructions); + let (loads, stores) = self.count_loads_and_stores(function, &refs); let all_instructions = self.count_all_instructions(function); @@ -871,7 +858,8 @@ impl BoilerplateStats { /// Estimated number of _useful_ instructions, which is the ones in the loop /// minus all in-loop boilerplate. fn useful_instructions(&self) -> usize { - // Two jumps + // Boilerplate only includes two jumps for induction variable comparison + // and the back edge jump to the header. // The comparison with the upper bound and the increments to the induction variable // are included in the `useless_instructions` let boilerplate = 2; @@ -879,7 +867,6 @@ impl BoilerplateStats { // NB we have not checked that these are actual pairs. let load_and_store = self.loads.min(self.stores) * 2; self.all_instructions - load_and_store - boilerplate - self.useless_instructions - // self.all_instructions - self.increments - load_and_store - boilerplate } /// Estimated number of instructions if we unroll the loop. @@ -892,10 +879,6 @@ impl BoilerplateStats { /// the blocks in tact with all the boilerplate involved in jumping, and the extra /// reference access instructions. fn is_small(&self) -> bool { - // dbg!(self.useful_instructions()); - // dbg!(self.iterations); - // dbg!(self.unrolled_instructions()); - // dbg!(self.baseline_instructions()); self.unrolled_instructions() < self.baseline_instructions() } } diff --git a/test_programs/execution_success/global_var_regression_simple/src/main.nr b/test_programs/execution_success/global_var_regression_simple/src/main.nr index b1bf753a73c..c437f5f8db4 100644 --- a/test_programs/execution_success/global_var_regression_simple/src/main.nr +++ b/test_programs/execution_success/global_var_regression_simple/src/main.nr @@ -7,19 +7,19 @@ fn main(x: Field, y: pub Field) { acc += EXPONENTIATE[i][j]; } } - assert(!acc.lt(x)); + // assert(!acc.lt(x)); assert(x != y); - dummy_again(x, y); + // dummy_again(x, y); } -fn dummy_again(x: Field, y: Field) { - let mut acc: Field = 0; - for i in 0..2 { - for j in 0..2 { - acc += EXPONENTIATE[i][j]; - } - } - assert(!acc.lt(x)); - assert(x != y); -} +// fn dummy_again(x: Field, y: Field) { +// let mut acc: Field = 0; +// for i in 0..2 { +// for j in 0..2 { +// acc += EXPONENTIATE[i][j]; +// } +// } +// assert(!acc.lt(x)); +// assert(x != y); +// } diff --git a/test_programs/execution_success/unroll_loop_regression/Nargo.toml b/test_programs/execution_success/unroll_loop_regression/Nargo.toml new file mode 100644 index 00000000000..1f055d9de5c --- /dev/null +++ b/test_programs/execution_success/unroll_loop_regression/Nargo.toml @@ -0,0 +1,6 @@ +[package] +name = "unroll_loop_regression" +type = "bin" +authors = [""] + +[dependencies] \ No newline at end of file diff --git a/test_programs/execution_success/unroll_loop_regression/Prover.toml b/test_programs/execution_success/unroll_loop_regression/Prover.toml new file mode 100644 index 00000000000..5ca6e3c8812 --- /dev/null +++ b/test_programs/execution_success/unroll_loop_regression/Prover.toml @@ -0,0 +1 @@ +fracs = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \ No newline at end of file diff --git a/test_programs/execution_success/unroll_loop_regression/src/main.nr b/test_programs/execution_success/unroll_loop_regression/src/main.nr new file mode 100644 index 00000000000..294eabfee63 --- /dev/null +++ b/test_programs/execution_success/unroll_loop_regression/src/main.nr @@ -0,0 +1,27 @@ +global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, 15, 16]; + +unconstrained fn main( + fracs: [Field; 16], +) -> pub [Field; 2] { + let mut partial_sums: [Field; 2] = std::mem::zeroed(); + + let mut partial_sum: Field = 0; + for i in 0..8 { + let summand = ROOTS[i] * fracs[i]; + partial_sum = partial_sum + summand; + } + partial_sums[0] = partial_sum; + + let NUM_PARTIAL_SUMS = 2; + for i in 1..NUM_PARTIAL_SUMS { + let mut partial_sum: Field = partial_sums[i - 1]; + for j in 0..8 { + let k = i * 8 + j; + let summand = ROOTS[k]* fracs[k]; + partial_sum = partial_sum + summand; + } + partial_sums[i] = partial_sum; + } + + partial_sums +} \ No newline at end of file diff --git a/tooling/nargo_cli/build.rs b/tooling/nargo_cli/build.rs index 5e101bc0483..e6559fb5bc0 100644 --- a/tooling/nargo_cli/build.rs +++ b/tooling/nargo_cli/build.rs @@ -198,8 +198,8 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner nargo.arg("--force-brillig"); // Set the maximum increase so that part of the optimization is exercised (it might fail). - nargo.arg("--max-bytecode-increase-percent"); - nargo.arg("50"); + // nargo.arg("--max-bytecode-increase-percent"); + // nargo.arg("50"); }} {test_content} From f21ee1ae92a9539db400d0a0a210ee94a9013b85 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Thu, 30 Jan 2025 13:24:55 +0000 Subject: [PATCH 3/9] restore global_var_regression_simple test --- .../global_var_regression_simple/src/main.nr | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test_programs/execution_success/global_var_regression_simple/src/main.nr b/test_programs/execution_success/global_var_regression_simple/src/main.nr index c437f5f8db4..b1bf753a73c 100644 --- a/test_programs/execution_success/global_var_regression_simple/src/main.nr +++ b/test_programs/execution_success/global_var_regression_simple/src/main.nr @@ -7,19 +7,19 @@ fn main(x: Field, y: pub Field) { acc += EXPONENTIATE[i][j]; } } - // assert(!acc.lt(x)); + assert(!acc.lt(x)); assert(x != y); - // dummy_again(x, y); + dummy_again(x, y); } -// fn dummy_again(x: Field, y: Field) { -// let mut acc: Field = 0; -// for i in 0..2 { -// for j in 0..2 { -// acc += EXPONENTIATE[i][j]; -// } -// } -// assert(!acc.lt(x)); -// assert(x != y); -// } +fn dummy_again(x: Field, y: Field) { + let mut acc: Field = 0; + for i in 0..2 { + for j in 0..2 { + acc += EXPONENTIATE[i][j]; + } + } + assert(!acc.lt(x)); + assert(x != y); +} From 892c528f33548d6dce55d0ec236277a94ea150b0 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Thu, 30 Jan 2025 13:30:04 +0000 Subject: [PATCH 4/9] nargo fmt --- .../unroll_loop_regression/src/main.nr | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test_programs/execution_success/unroll_loop_regression/src/main.nr b/test_programs/execution_success/unroll_loop_regression/src/main.nr index 294eabfee63..e0dfe23c087 100644 --- a/test_programs/execution_success/unroll_loop_regression/src/main.nr +++ b/test_programs/execution_success/unroll_loop_regression/src/main.nr @@ -1,8 +1,6 @@ -global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7,8, 9, 10, 11, 12, 13, 14, 15, 16]; +global ROOTS: [Field; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; -unconstrained fn main( - fracs: [Field; 16], -) -> pub [Field; 2] { +unconstrained fn main(fracs: [Field; 16]) -> pub [Field; 2] { let mut partial_sums: [Field; 2] = std::mem::zeroed(); let mut partial_sum: Field = 0; @@ -17,11 +15,11 @@ unconstrained fn main( let mut partial_sum: Field = partial_sums[i - 1]; for j in 0..8 { let k = i * 8 + j; - let summand = ROOTS[k]* fracs[k]; + let summand = ROOTS[k] * fracs[k]; partial_sum = partial_sum + summand; } partial_sums[i] = partial_sum; } partial_sums -} \ No newline at end of file +} From 735064038dec2a71473b6dca90fb316382d3e308 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Thu, 30 Jan 2025 19:05:43 +0000 Subject: [PATCH 5/9] use num ssa instructions for bytecode size increase --- .../noirc_evaluator/src/ssa/ir/function.rs | 10 +++++ .../noirc_evaluator/src/ssa/opt/unrolling.rs | 40 ++----------------- tooling/nargo_cli/build.rs | 4 +- 3 files changed, 15 insertions(+), 39 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/ir/function.rs b/compiler/noirc_evaluator/src/ssa/ir/function.rs index 6a0659e81bf..e7748b5f13f 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/function.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/function.rs @@ -211,6 +211,16 @@ impl Function { unreachable!("SSA Function {} has no reachable return instruction!", self.id()) } + + pub(crate) fn num_instructions(&self) -> usize { + self.reachable_blocks() + .iter() + .map(|block| { + let block = &self.dfg[*block]; + block.instructions().len() + block.terminator().is_some() as usize + }) + .sum() + } } impl Clone for Function { diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index 3f358b8d9b1..430cddab1e4 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -23,10 +23,6 @@ use std::collections::BTreeSet; use acvm::{acir::AcirField, FieldElement}; use crate::{ - brillig::{ - brillig_gen::{brillig_globals::convert_ssa_globals, convert_ssa_function}, - brillig_ir::brillig_variable::BrilligVariable, - }, errors::RuntimeError, ssa::{ ir::{ @@ -59,7 +55,7 @@ impl Ssa { mut self, max_bytecode_increase_percent: Option, ) -> Result { - let mut global_cache = None; + // let mut global_cache = None; for function in self.functions.values_mut() { let is_brillig = function.runtime().is_brillig(); @@ -80,20 +76,9 @@ impl Ssa { // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators). if let Some(max_incr_pct) = max_bytecode_increase_percent { - if global_cache.is_none() { - let globals = (*function.dfg.globals).clone(); - let used_globals = &globals.values_iter().map(|(id, _)| id).collect(); - let globals_dfg = DataFlowGraph::from(globals); - // DIE is run at the end of our SSA optimizations, so we mark all globals as in use here. - let (_, brillig_globals, _) = - convert_ssa_globals(false, &globals_dfg, used_globals, function.id()); - global_cache = Some(brillig_globals); - } - let brillig_globals = global_cache.as_ref().unwrap(); - let orig_function = orig_function.expect("took snapshot to compare"); - let new_size = brillig_bytecode_size(function, brillig_globals); - let orig_size = brillig_bytecode_size(&orig_function, brillig_globals); + let new_size = function.num_instructions(); + let orig_size = function.num_instructions(); if !is_new_size_ok(orig_size, new_size, max_incr_pct) { *function = orig_function; } @@ -1140,25 +1125,6 @@ fn simplify_between_unrolls(function: &mut Function) { function.mem2reg(); } -/// Convert the function to Brillig bytecode and return the resulting size. -fn brillig_bytecode_size( - function: &Function, - globals: &HashMap, -) -> usize { - // We need to do some SSA passes in order for the conversion to be able to go ahead, - // otherwise we can hit `unreachable!()` instructions in `convert_ssa_instruction`. - // Creating a clone so as not to modify the originals. - let mut temp = function.clone(); - - // Might as well give it the best chance. - simplify_between_unrolls(&mut temp); - - // This is to try to prevent hitting ICE. - temp.dead_instruction_elimination(false, true); - - convert_ssa_function(&temp, false, globals).byte_code.len() -} - /// Decide if the new bytecode size is acceptable, compared to the original. /// /// The maximum increase can be expressed as a negative value if we demand a decrease. diff --git a/tooling/nargo_cli/build.rs b/tooling/nargo_cli/build.rs index e6559fb5bc0..5e101bc0483 100644 --- a/tooling/nargo_cli/build.rs +++ b/tooling/nargo_cli/build.rs @@ -198,8 +198,8 @@ fn test_{test_name}(force_brillig: ForceBrillig, inliner_aggressiveness: Inliner nargo.arg("--force-brillig"); // Set the maximum increase so that part of the optimization is exercised (it might fail). - // nargo.arg("--max-bytecode-increase-percent"); - // nargo.arg("50"); + nargo.arg("--max-bytecode-increase-percent"); + nargo.arg("50"); }} {test_content} From a739ae042e85df23326087def9badf678be9a84b Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Fri, 31 Jan 2025 14:55:26 +0000 Subject: [PATCH 6/9] dont use none for bytecode size increase --- compiler/noirc_driver/src/lib.rs | 4 ++-- compiler/noirc_evaluator/src/ssa.rs | 2 +- compiler/noirc_evaluator/src/ssa/opt/hint.rs | 2 +- compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/compiler/noirc_driver/src/lib.rs b/compiler/noirc_driver/src/lib.rs index 62186a0c9e8..685f6f89ac5 100644 --- a/compiler/noirc_driver/src/lib.rs +++ b/compiler/noirc_driver/src/lib.rs @@ -156,8 +156,8 @@ pub struct CompileOptions { /// as it required fewer SSA instructions. /// A higher value results in fewer jumps but a larger program. /// A lower value keeps the original program if it was smaller, even if it has more jumps. - #[arg(long, hide = true, allow_hyphen_values = true)] - pub max_bytecode_increase_percent: Option, + #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = i32::MAX)] + pub max_bytecode_increase_percent: i32, /// Use pedantic ACVM solving, i.e. double-check some black-box function /// assumptions when solving. diff --git a/compiler/noirc_evaluator/src/ssa.rs b/compiler/noirc_evaluator/src/ssa.rs index abe50bf91e5..609bb03357f 100644 --- a/compiler/noirc_evaluator/src/ssa.rs +++ b/compiler/noirc_evaluator/src/ssa.rs @@ -77,7 +77,7 @@ pub struct SsaEvaluatorOptions { /// Maximum accepted percentage increase in the Brillig bytecode size after unrolling loops. /// When `None` the size increase check is skipped altogether and any decrease in the SSA /// instruction count is accepted. - pub max_bytecode_increase_percent: Option, + pub max_bytecode_increase_percent: i32, } pub(crate) struct ArtifactsAndWarnings(Artifacts, Vec); diff --git a/compiler/noirc_evaluator/src/ssa/opt/hint.rs b/compiler/noirc_evaluator/src/ssa/opt/hint.rs index 4897799f371..db79a932954 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/hint.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/hint.rs @@ -20,7 +20,7 @@ mod tests { skip_underconstrained_check: true, enable_brillig_constraints_check: false, inliner_aggressiveness: 0, - max_bytecode_increase_percent: None, + max_bytecode_increase_percent: i32::MAX, }; let builder = SsaBuilder { diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index eb5fe6873f5..2e753e0249c 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -53,14 +53,14 @@ impl Ssa { #[tracing::instrument(level = "trace", skip(self))] pub(crate) fn unroll_loops_iteratively( mut self, - max_bytecode_increase_percent: Option, + max_bytecode_increase_percent: i32, ) -> Result { for function in self.functions.values_mut() { let is_brillig = function.runtime().is_brillig(); // Take a snapshot in case we have to restore it. let orig_function = - (max_bytecode_increase_percent.is_some() && is_brillig).then(|| function.clone()); + (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone()); // We must be able to unroll ACIR loops at this point, so exit on failure to unroll. let has_unrolled = function.unroll_loops_iteratively()?; @@ -73,11 +73,11 @@ impl Ssa { // TODO: As we unroll more loops, this is potentially going to lead to panics // when we have a non-aggressiveness inliner as the compiler is not going to have yet resolved // certain intrinsics which we expect to be entirely known at compile-time (e.g. DerivePedersenGenerators). - if let Some(max_incr_pct) = max_bytecode_increase_percent { + if max_bytecode_increase_percent < i32::MAX { let orig_function = orig_function.expect("took snapshot to compare"); let new_size = function.num_instructions(); let orig_size = function.num_instructions(); - if !is_new_size_ok(orig_size, new_size, max_incr_pct) { + if !is_new_size_ok(orig_size, new_size, max_bytecode_increase_percent) { *function = orig_function; } } @@ -1416,7 +1416,7 @@ mod tests { #[test] fn test_brillig_unroll_iteratively_respects_max_increase() { let ssa = brillig_unroll_test_case(); - let ssa = ssa.unroll_loops_iteratively(Some(-90)).unwrap(); + let ssa = ssa.unroll_loops_iteratively(-90).unwrap(); // Check that it's still the original assert_normalized_ssa_equals(ssa, brillig_unroll_test_case().to_string().as_str()); } @@ -1424,7 +1424,7 @@ mod tests { #[test] fn test_brillig_unroll_iteratively_with_large_max_increase() { let ssa = brillig_unroll_test_case(); - let ssa = ssa.unroll_loops_iteratively(Some(50)).unwrap(); + let ssa = ssa.unroll_loops_iteratively(50).unwrap(); // Check that it did the unroll assert_eq!(ssa.main().reachable_blocks().len(), 2, "The loop should be unrolled"); } From b0b0cca3972a690af66e517e90a42b7bf668bd97 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Fri, 31 Jan 2025 09:57:22 -0500 Subject: [PATCH 7/9] Update compiler/noirc_evaluator/src/ssa/opt/unrolling.rs --- compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index 2e753e0249c..4414a2af750 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -60,7 +60,7 @@ impl Ssa { // Take a snapshot in case we have to restore it. let orig_function = - (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone()); + (max_bytecode_increase_percent < i32::MAX && is_brillig).then(|| function.clone()); // We must be able to unroll ACIR loops at this point, so exit on failure to unroll. let has_unrolled = function.unroll_loops_iteratively()?; From 9a393fdcdb8b31d58c7f617310bce51a8693dc9c Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Fri, 31 Jan 2025 14:57:46 +0000 Subject: [PATCH 8/9] default bytecode size increase of 0 --- compiler/noirc_driver/src/lib.rs | 2 +- compiler/noirc_evaluator/src/ssa/opt/unrolling.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/noirc_driver/src/lib.rs b/compiler/noirc_driver/src/lib.rs index 685f6f89ac5..0f6fb7773b7 100644 --- a/compiler/noirc_driver/src/lib.rs +++ b/compiler/noirc_driver/src/lib.rs @@ -156,7 +156,7 @@ pub struct CompileOptions { /// as it required fewer SSA instructions. /// A higher value results in fewer jumps but a larger program. /// A lower value keeps the original program if it was smaller, even if it has more jumps. - #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = i32::MAX)] + #[arg(long, hide = true, allow_hyphen_values = true, default_value_t = 0)] pub max_bytecode_increase_percent: i32, /// Use pedantic ACVM solving, i.e. double-check some black-box function diff --git a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs index 2e753e0249c..4414a2af750 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/unrolling.rs @@ -60,7 +60,7 @@ impl Ssa { // Take a snapshot in case we have to restore it. let orig_function = - (max_bytecode_increase_percent == i32::MAX && is_brillig).then(|| function.clone()); + (max_bytecode_increase_percent < i32::MAX && is_brillig).then(|| function.clone()); // We must be able to unroll ACIR loops at this point, so exit on failure to unroll. let has_unrolled = function.unroll_loops_iteratively()?; From c4f767c9aa0d4893f204a3bb5577701fad95b4f9 Mon Sep 17 00:00:00 2001 From: Maxim Vezenov Date: Fri, 31 Jan 2025 20:00:45 +0000 Subject: [PATCH 9/9] empty commit