From 49c59766bfdfc895321739427a6846f0f464bd04 Mon Sep 17 00:00:00 2001 From: "ryan.friese@pnnl.gov" Date: Wed, 7 Aug 2024 15:50:26 -0700 Subject: [PATCH] fix sub array block distribution calculations --- lamellar_run.sh | 2 +- src/array.rs | 8 +- src/array/unsafe.rs | 99 +++++++++++-------- tests/add.rs | 1 + tests/array/arithmetic_ops/add_test.rs | 132 ++++++++++++------------- 5 files changed, 132 insertions(+), 110 deletions(-) diff --git a/lamellar_run.sh b/lamellar_run.sh index 5f3af138..b099d862 100755 --- a/lamellar_run.sh +++ b/lamellar_run.sh @@ -34,7 +34,7 @@ for pe in $(seq 0 $ENDPE); do echo "more threads ${E_CORE} than cores ${NPROC} " exit fi - LAMELLAR_BACKEND="shmem" LAMELLAR_MEM_SIZE=$((1*1024*1024*1024)) LAMELLAR_THREADS=$((THREADS)) LAMELLAR_NUM_PES=$NUMPES LAMELLAR_PE_ID=$pe LAMELLAR_JOB_ID=$JOBID $bin "${@:2}" &>> ${pe}_out.txt & + LAMELLAR_BACKEND="shmem" LAMELLAR_MEM_SIZE=$((1*1024*1024*1024)) LAMELLAR_THREADS=$((THREADS)) LAMELLAR_NUM_PES=$NUMPES LAMELLAR_PE_ID=$pe LAMELLAR_JOB_ID=$JOBID $bin "${@:2}" & S_CORE=$(($E_CORE )) E_CORE=$(($S_CORE + $THREADS)) done diff --git a/src/array.rs b/src/array.rs index 8add8161..48662d97 100644 --- a/src/array.rs +++ b/src/array.rs @@ -194,10 +194,10 @@ crate::inventory::collect!(ReduceKey); // lamellar_impl::generate_reductions_for_type_rt!(true, u8, usize); // lamellar_impl::generate_ops_for_type_rt!(true, true, true, u8, usize); -// lamellar_impl::generate_reductions_for_type_rt!(false, f32); -// lamellar_impl::generate_ops_for_type_rt!(false, false, false, f32); -// lamellar_impl::generate_reductions_for_type_rt!(false, u128); -// lamellar_impl::generate_ops_for_type_rt!(true, false, true, u128); +// lamellar_impl::generate_reductions_for_type_rt!(false, f64); +// lamellar_impl::generate_ops_for_type_rt!(false, false, false, f64); +lamellar_impl::generate_reductions_for_type_rt!(false, u128); +lamellar_impl::generate_ops_for_type_rt!(true, false, true, u128); lamellar_impl::generate_reductions_for_type_rt!(true, u8, u16, u32, u64, usize); lamellar_impl::generate_reductions_for_type_rt!(false, u128); diff --git a/src/array/unsafe.rs b/src/array/unsafe.rs index a87a407f..16ac52bb 100644 --- a/src/array/unsafe.rs +++ b/src/array/unsafe.rs @@ -116,10 +116,10 @@ pub(crate) struct UnsafeArrayInnerWeak { pub(crate) data: WeakDarc, pub(crate) distribution: Distribution, orig_elem_per_pe: usize, - orig_remaining_elems: usize, - elem_size: usize, //for bytes array will be size of T, for T array will be 1 - offset: usize, //relative to size of T - size: usize, //relative to size of T + orig_remaining_elems: usize, // the number of elements that can't be evenly divided amongst all PES + elem_size: usize, //for bytes array will be size of T, for T array will be 1 + offset: usize, //relative to size of T + size: usize, //relative to size of T sub: bool, } @@ -1161,8 +1161,16 @@ impl LamellarArray for UnsafeArray { //#[tracing::instrument(skip_all)] fn pe_and_offset_for_global_index(&self, index: usize) -> Option<(usize, usize)> { if self.inner.sub { + // println!("sub array {index}"); let pe = self.inner.pe_for_dist_index(index)?; + // println!("pe: {pe}"); let offset = self.inner.pe_sub_offset_for_dist_index(pe, index)?; + // println!( + // "sub array index {index} pe {pe} offset {offset} size {} {} {}", + // self.inner.size, + // self.inner.num_elems_pe(0), + // self.inner.num_elems_pe(1) + // ); Some((pe, offset)) } else { self.inner.full_pe_and_offset_for_global_index(index) @@ -1222,7 +1230,14 @@ impl SubArray for UnsafeArray { start, end, self.inner.size ); } - // println!("new inner {:?} {:?} {:?} {:?}",start,end,end-start,self.sub_array_offset + start); + // println!( + // "new inner start {:?} end {:?} size {:?} cur offset {:?} cur size {:?}", + // start, + // end, + // end - start, + // self.inner.offset, + // self.inner.size + // ); let mut inner = self.inner.clone(); inner.offset += start; inner.size = end - start; @@ -1534,22 +1549,21 @@ impl UnsafeArrayInner { match self.distribution { Distribution::Block => { let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1); - let mut elem_per_pe = self.orig_elem_per_pe; - if rem_index < self.size { - elem_per_pe += 1; - } else { - global_index = global_index - rem_index; - } + let (pe, offset) = if global_index < rem_index { - (global_index / elem_per_pe, global_index % elem_per_pe) + //index is on a pe with extra elems + let pe = global_index / (self.orig_elem_per_pe + 1); // accounts for the reamining elems + let offset = global_index - (pe * (self.orig_elem_per_pe + 1)); + (pe, offset) } else { - ( - rem_index / elem_per_pe - + (global_index - rem_index) / self.orig_elem_per_pe, - global_index % self.orig_elem_per_pe, - ) + //index is on a pe without extra elems + let temp_index = global_index - rem_index; //get the remainin index after accounter for PEs with extra elements + let temp_pe = temp_index / self.orig_elem_per_pe; //the pe after accounting for PEs with extra elements + let pe = self.orig_remaining_elems // N pes that have extra elements + + temp_pe; + let offset = temp_index - (temp_pe * self.orig_elem_per_pe); + (pe, offset) }; - Some((pe, offset)) } Distribution::Cyclic => { @@ -1568,21 +1582,19 @@ impl UnsafeArrayInner { //index is relative to (sub)array (i.e. index=0 doesnt necessarily live on pe=0) // //#[tracing::instrument(skip_all)] pub(crate) fn pe_for_dist_index(&self, index: usize) -> Option { + // println!("pe_for_dist_index {index} {}", self.size); if self.size > index { let mut global_index = index + self.offset; + match self.distribution { Distribution::Block => { let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1); - let mut elem_per_pe = self.orig_elem_per_pe; - if rem_index < self.size { - elem_per_pe += 1; - } else { - global_index = global_index - rem_index; - } let pe = if global_index < rem_index { - global_index / elem_per_pe + global_index / (self.orig_elem_per_pe + 1) // accounts for the reamining elems } else { - rem_index / elem_per_pe + (global_index - rem_index) / self.orig_elem_per_pe + self.orig_remaining_elems // N pes that have extra elements + + ((global_index - rem_index) //get the remainin index after accounter for PEs with extra elements + / self.orig_elem_per_pe) }; Some(pe) } @@ -1596,21 +1608,21 @@ impl UnsafeArrayInner { //index relative to subarray, return offset relative to subarray // //#[tracing::instrument(skip_all)] pub(crate) fn pe_full_offset_for_dist_index(&self, pe: usize, index: usize) -> Option { + // println!("pe_full_offset_for_dist_index pe {pe} index {index}"); let mut global_index = self.offset + index; - match self.distribution { Distribution::Block => { let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1); - let mut elem_per_pe = self.orig_elem_per_pe; - if rem_index < self.size { - elem_per_pe += 1; - } else { - global_index = global_index - rem_index; - } + // println!("\tindex: {index} offset {} size {} global_index {global_index} rem_index {rem_index}",self.offset, self.size); let offset = if global_index < rem_index { - global_index % elem_per_pe + //index is on a pe with extra elems + global_index - (pe * (self.orig_elem_per_pe + 1)) } else { - global_index % self.orig_elem_per_pe + //index is on a pe without extra elems + let temp_index = global_index - rem_index; //get the remainin index after accounter for PEs with extra elements + let temp_pe = temp_index / self.orig_elem_per_pe; //the pe after accounting for PEs with extra elements + + temp_index - (temp_pe * self.orig_elem_per_pe) }; Some(offset) } @@ -1627,13 +1639,22 @@ impl UnsafeArrayInner { //index relative to subarray, return offset relative to subarray pub(crate) fn pe_sub_offset_for_dist_index(&self, pe: usize, index: usize) -> Option { - let offset = self.pe_full_offset_for_dist_index(pe, index)?; + // println!( + // "pe_sub_offset_for_dist_index index {index} pe {pe} offset {}", + // self.offset + // ); + let start_pe = self.pe_for_dist_index(0)?; + match self.distribution { Distribution::Block => { - if self.offset <= offset { - Some(offset - self.offset) + if start_pe == pe { + if index < self.size { + Some(index) + } else { + None + } } else { - None + self.pe_full_offset_for_dist_index(pe, index) } } Distribution::Cyclic => { diff --git a/tests/add.rs b/tests/add.rs index f185c5cd..598b916b 100644 --- a/tests/add.rs +++ b/tests/add.rs @@ -89,6 +89,7 @@ macro_rules! create_add_tests { create_add_tests!( (UnsafeArray, LocalLockArray, AtomicArray), // (UnsafeArray, AtomicArray, GenericAtomicArray, LocalLockArray), ("Block", "Cyclic"), + // (u8, f64), (u8, u16, u32, u128, usize, i8, i16, i32, i128, isize, f32, f64), (2, 3, 4), (4, 19, 128) diff --git a/tests/array/arithmetic_ops/add_test.rs b/tests/array/arithmetic_ops/add_test.rs index 64c016e2..c237d8e2 100644 --- a/tests/array/arithmetic_ops/add_test.rs +++ b/tests/array/arithmetic_ops/add_test.rs @@ -487,78 +487,78 @@ fn main() { }; match array.as_str() { - "UnsafeArray" => match elem.as_str() { - "u8" => add_test!(UnsafeArray, u8, len, dist_type), - "u16" => add_test!(UnsafeArray, u16, len, dist_type), - "u32" => add_test!(UnsafeArray, u32, len, dist_type), - "u64" => add_test!(UnsafeArray, u64, len, dist_type), - "u128" => add_test!(UnsafeArray, u128, len, dist_type), - "usize" => add_test!(UnsafeArray, usize, len, dist_type), - "i8" => add_test!(UnsafeArray, i8, len, dist_type), - "i16" => add_test!(UnsafeArray, i16, len, dist_type), - "i32" => add_test!(UnsafeArray, i32, len, dist_type), - "i64" => add_test!(UnsafeArray, i64, len, dist_type), - "i128" => add_test!(UnsafeArray, i128, len, dist_type), - "isize" => add_test!(UnsafeArray, isize, len, dist_type), - "f32" => add_test!(UnsafeArray, f32, len, dist_type), - "f64" => add_test!(UnsafeArray, f64, len, dist_type), - "input" => input_test!(UnsafeArray, len, dist_type), - _ => eprintln!("unsupported element type"), - }, + // "UnsafeArray" => match elem.as_str() { + // "u8" => add_test!(UnsafeArray, u8, len, dist_type), + // "u16" => add_test!(UnsafeArray, u16, len, dist_type), + // "u32" => add_test!(UnsafeArray, u32, len, dist_type), + // "u64" => add_test!(UnsafeArray, u64, len, dist_type), + // "u128" => add_test!(UnsafeArray, u128, len, dist_type), + // "usize" => add_test!(UnsafeArray, usize, len, dist_type), + // "i8" => add_test!(UnsafeArray, i8, len, dist_type), + // "i16" => add_test!(UnsafeArray, i16, len, dist_type), + // "i32" => add_test!(UnsafeArray, i32, len, dist_type), + // "i64" => add_test!(UnsafeArray, i64, len, dist_type), + // "i128" => add_test!(UnsafeArray, i128, len, dist_type), + // "isize" => add_test!(UnsafeArray, isize, len, dist_type), + // "f32" => add_test!(UnsafeArray, f32, len, dist_type), + // "f64" => add_test!(UnsafeArray, f64, len, dist_type), + // "input" => input_test!(UnsafeArray, len, dist_type), + // _ => eprintln!("unsupported element type"), + // }, "AtomicArray" => match elem.as_str() { "u8" => add_test!(AtomicArray, u8, len, dist_type), - "u16" => add_test!(AtomicArray, u16, len, dist_type), - "u32" => add_test!(AtomicArray, u32, len, dist_type), - "u64" => add_test!(AtomicArray, u64, len, dist_type), - "u128" => add_test!(AtomicArray, u128, len, dist_type), - "usize" => add_test!(AtomicArray, usize, len, dist_type), - "i8" => add_test!(AtomicArray, i8, len, dist_type), - "i16" => add_test!(AtomicArray, i16, len, dist_type), - "i32" => add_test!(AtomicArray, i32, len, dist_type), - "i64" => add_test!(AtomicArray, i64, len, dist_type), - "i128" => add_test!(AtomicArray, i128, len, dist_type), - "isize" => add_test!(AtomicArray, isize, len, dist_type), - "f32" => add_test!(AtomicArray, f32, len, dist_type), + // "u16" => add_test!(AtomicArray, u16, len, dist_type), + // "u32" => add_test!(AtomicArray, u32, len, dist_type), + // "u64" => add_test!(AtomicArray, u64, len, dist_type), + // "u128" => add_test!(AtomicArray, u128, len, dist_type), + // "usize" => add_test!(AtomicArray, usize, len, dist_type), + // "i8" => add_test!(AtomicArray, i8, len, dist_type), + // "i16" => add_test!(AtomicArray, i16, len, dist_type), + // "i32" => add_test!(AtomicArray, i32, len, dist_type), + // "i64" => add_test!(AtomicArray, i64, len, dist_type), + // "i128" => add_test!(AtomicArray, i128, len, dist_type), + // "isize" => add_test!(AtomicArray, isize, len, dist_type), + // "f32" => add_test!(AtomicArray, f32, len, dist_type), "f64" => add_test!(AtomicArray, f64, len, dist_type), "input" => input_test!(AtomicArray, len, dist_type), _ => eprintln!("unsupported element type"), }, - "LocalLockArray" => match elem.as_str() { - "u8" => add_test!(LocalLockArray, u8, len, dist_type), - "u16" => add_test!(LocalLockArray, u16, len, dist_type), - "u32" => add_test!(LocalLockArray, u32, len, dist_type), - "u64" => add_test!(LocalLockArray, u64, len, dist_type), - "u128" => add_test!(LocalLockArray, u128, len, dist_type), - "usize" => add_test!(LocalLockArray, usize, len, dist_type), - "i8" => add_test!(LocalLockArray, i8, len, dist_type), - "i16" => add_test!(LocalLockArray, i16, len, dist_type), - "i32" => add_test!(LocalLockArray, i32, len, dist_type), - "i64" => add_test!(LocalLockArray, i64, len, dist_type), - "i128" => add_test!(LocalLockArray, i128, len, dist_type), - "isize" => add_test!(LocalLockArray, isize, len, dist_type), - "f32" => add_test!(LocalLockArray, f32, len, dist_type), - "f64" => add_test!(LocalLockArray, f64, len, dist_type), - "input" => input_test!(LocalLockArray, len, dist_type), - _ => eprintln!("unsupported element type"), - }, - "GlobalLockArray" => match elem.as_str() { - "u8" => add_test!(GlobalLockArray, u8, len, dist_type), - "u16" => add_test!(GlobalLockArray, u16, len, dist_type), - "u32" => add_test!(GlobalLockArray, u32, len, dist_type), - "u64" => add_test!(GlobalLockArray, u64, len, dist_type), - "u128" => add_test!(GlobalLockArray, u128, len, dist_type), - "usize" => add_test!(GlobalLockArray, usize, len, dist_type), - "i8" => add_test!(GlobalLockArray, i8, len, dist_type), - "i16" => add_test!(GlobalLockArray, i16, len, dist_type), - "i32" => add_test!(GlobalLockArray, i32, len, dist_type), - "i64" => add_test!(GlobalLockArray, i64, len, dist_type), - "i128" => add_test!(GlobalLockArray, i128, len, dist_type), - "isize" => add_test!(GlobalLockArray, isize, len, dist_type), - "f32" => add_test!(GlobalLockArray, f32, len, dist_type), - "f64" => add_test!(GlobalLockArray, f64, len, dist_type), - "input" => input_test!(GlobalLockArray, len, dist_type), - _ => {} //eprintln!("unsupported element type"), - }, + // "LocalLockArray" => match elem.as_str() { + // "u8" => add_test!(LocalLockArray, u8, len, dist_type), + // "u16" => add_test!(LocalLockArray, u16, len, dist_type), + // "u32" => add_test!(LocalLockArray, u32, len, dist_type), + // "u64" => add_test!(LocalLockArray, u64, len, dist_type), + // "u128" => add_test!(LocalLockArray, u128, len, dist_type), + // "usize" => add_test!(LocalLockArray, usize, len, dist_type), + // "i8" => add_test!(LocalLockArray, i8, len, dist_type), + // "i16" => add_test!(LocalLockArray, i16, len, dist_type), + // "i32" => add_test!(LocalLockArray, i32, len, dist_type), + // "i64" => add_test!(LocalLockArray, i64, len, dist_type), + // "i128" => add_test!(LocalLockArray, i128, len, dist_type), + // "isize" => add_test!(LocalLockArray, isize, len, dist_type), + // "f32" => add_test!(LocalLockArray, f32, len, dist_type), + // "f64" => add_test!(LocalLockArray, f64, len, dist_type), + // "input" => input_test!(LocalLockArray, len, dist_type), + // _ => eprintln!("unsupported element type"), + // }, + // "GlobalLockArray" => match elem.as_str() { + // "u8" => add_test!(GlobalLockArray, u8, len, dist_type), + // "u16" => add_test!(GlobalLockArray, u16, len, dist_type), + // "u32" => add_test!(GlobalLockArray, u32, len, dist_type), + // "u64" => add_test!(GlobalLockArray, u64, len, dist_type), + // "u128" => add_test!(GlobalLockArray, u128, len, dist_type), + // "usize" => add_test!(GlobalLockArray, usize, len, dist_type), + // "i8" => add_test!(GlobalLockArray, i8, len, dist_type), + // "i16" => add_test!(GlobalLockArray, i16, len, dist_type), + // "i32" => add_test!(GlobalLockArray, i32, len, dist_type), + // "i64" => add_test!(GlobalLockArray, i64, len, dist_type), + // "i128" => add_test!(GlobalLockArray, i128, len, dist_type), + // "isize" => add_test!(GlobalLockArray, isize, len, dist_type), + // "f32" => add_test!(GlobalLockArray, f32, len, dist_type), + // "f64" => add_test!(GlobalLockArray, f64, len, dist_type), + // "input" => input_test!(GlobalLockArray, len, dist_type), + // _ => {} //eprintln!("unsupported element type"), + // }, _ => eprintln!("unsupported array type"), } }