Skip to content

Commit

Permalink
fix sub array block distribution calculations
Browse files Browse the repository at this point in the history
  • Loading branch information
rdfriese committed Aug 7, 2024
1 parent 3b844fd commit 49c5976
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 110 deletions.
2 changes: 1 addition & 1 deletion lamellar_run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ for pe in $(seq 0 $ENDPE); do
echo "more threads ${E_CORE} than cores ${NPROC} "
exit
fi
LAMELLAR_BACKEND="shmem" LAMELLAR_MEM_SIZE=$((1*1024*1024*1024)) LAMELLAR_THREADS=$((THREADS)) LAMELLAR_NUM_PES=$NUMPES LAMELLAR_PE_ID=$pe LAMELLAR_JOB_ID=$JOBID $bin "${@:2}" &>> ${pe}_out.txt &
LAMELLAR_BACKEND="shmem" LAMELLAR_MEM_SIZE=$((1*1024*1024*1024)) LAMELLAR_THREADS=$((THREADS)) LAMELLAR_NUM_PES=$NUMPES LAMELLAR_PE_ID=$pe LAMELLAR_JOB_ID=$JOBID $bin "${@:2}" &
S_CORE=$(($E_CORE ))
E_CORE=$(($S_CORE + $THREADS))
done
Expand Down
8 changes: 4 additions & 4 deletions src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ crate::inventory::collect!(ReduceKey);
// lamellar_impl::generate_reductions_for_type_rt!(true, u8, usize);
// lamellar_impl::generate_ops_for_type_rt!(true, true, true, u8, usize);

// lamellar_impl::generate_reductions_for_type_rt!(false, f32);
// lamellar_impl::generate_ops_for_type_rt!(false, false, false, f32);
// lamellar_impl::generate_reductions_for_type_rt!(false, u128);
// lamellar_impl::generate_ops_for_type_rt!(true, false, true, u128);
// lamellar_impl::generate_reductions_for_type_rt!(false, f64);
// lamellar_impl::generate_ops_for_type_rt!(false, false, false, f64);
lamellar_impl::generate_reductions_for_type_rt!(false, u128);
lamellar_impl::generate_ops_for_type_rt!(true, false, true, u128);

lamellar_impl::generate_reductions_for_type_rt!(true, u8, u16, u32, u64, usize);
lamellar_impl::generate_reductions_for_type_rt!(false, u128);
Expand Down
99 changes: 60 additions & 39 deletions src/array/unsafe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ pub(crate) struct UnsafeArrayInnerWeak {
pub(crate) data: WeakDarc<UnsafeArrayData>,
pub(crate) distribution: Distribution,
orig_elem_per_pe: usize,
orig_remaining_elems: usize,
elem_size: usize, //for bytes array will be size of T, for T array will be 1
offset: usize, //relative to size of T
size: usize, //relative to size of T
orig_remaining_elems: usize, // the number of elements that can't be evenly divided amongst all PES
elem_size: usize, //for bytes array will be size of T, for T array will be 1
offset: usize, //relative to size of T
size: usize, //relative to size of T
sub: bool,
}

Expand Down Expand Up @@ -1161,8 +1161,16 @@ impl<T: Dist> LamellarArray<T> for UnsafeArray<T> {
//#[tracing::instrument(skip_all)]
fn pe_and_offset_for_global_index(&self, index: usize) -> Option<(usize, usize)> {
if self.inner.sub {
// println!("sub array {index}");
let pe = self.inner.pe_for_dist_index(index)?;
// println!("pe: {pe}");
let offset = self.inner.pe_sub_offset_for_dist_index(pe, index)?;
// println!(
// "sub array index {index} pe {pe} offset {offset} size {} {} {}",
// self.inner.size,
// self.inner.num_elems_pe(0),
// self.inner.num_elems_pe(1)
// );
Some((pe, offset))
} else {
self.inner.full_pe_and_offset_for_global_index(index)
Expand Down Expand Up @@ -1222,7 +1230,14 @@ impl<T: Dist> SubArray<T> for UnsafeArray<T> {
start, end, self.inner.size
);
}
// println!("new inner {:?} {:?} {:?} {:?}",start,end,end-start,self.sub_array_offset + start);
// println!(
// "new inner start {:?} end {:?} size {:?} cur offset {:?} cur size {:?}",
// start,
// end,
// end - start,
// self.inner.offset,
// self.inner.size
// );
let mut inner = self.inner.clone();
inner.offset += start;
inner.size = end - start;
Expand Down Expand Up @@ -1534,22 +1549,21 @@ impl UnsafeArrayInner {
match self.distribution {
Distribution::Block => {
let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1);
let mut elem_per_pe = self.orig_elem_per_pe;
if rem_index < self.size {
elem_per_pe += 1;
} else {
global_index = global_index - rem_index;
}

let (pe, offset) = if global_index < rem_index {
(global_index / elem_per_pe, global_index % elem_per_pe)
//index is on a pe with extra elems
let pe = global_index / (self.orig_elem_per_pe + 1); // accounts for the reamining elems
let offset = global_index - (pe * (self.orig_elem_per_pe + 1));
(pe, offset)
} else {
(
rem_index / elem_per_pe
+ (global_index - rem_index) / self.orig_elem_per_pe,
global_index % self.orig_elem_per_pe,
)
//index is on a pe without extra elems
let temp_index = global_index - rem_index; //get the remainin index after accounter for PEs with extra elements
let temp_pe = temp_index / self.orig_elem_per_pe; //the pe after accounting for PEs with extra elements
let pe = self.orig_remaining_elems // N pes that have extra elements
+ temp_pe;
let offset = temp_index - (temp_pe * self.orig_elem_per_pe);
(pe, offset)
};

Some((pe, offset))
}
Distribution::Cyclic => {
Expand All @@ -1568,21 +1582,19 @@ impl UnsafeArrayInner {
//index is relative to (sub)array (i.e. index=0 doesnt necessarily live on pe=0)
// //#[tracing::instrument(skip_all)]
pub(crate) fn pe_for_dist_index(&self, index: usize) -> Option<usize> {
// println!("pe_for_dist_index {index} {}", self.size);
if self.size > index {
let mut global_index = index + self.offset;

match self.distribution {
Distribution::Block => {
let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1);
let mut elem_per_pe = self.orig_elem_per_pe;
if rem_index < self.size {
elem_per_pe += 1;
} else {
global_index = global_index - rem_index;
}
let pe = if global_index < rem_index {
global_index / elem_per_pe
global_index / (self.orig_elem_per_pe + 1) // accounts for the reamining elems
} else {
rem_index / elem_per_pe + (global_index - rem_index) / self.orig_elem_per_pe
self.orig_remaining_elems // N pes that have extra elements
+ ((global_index - rem_index) //get the remainin index after accounter for PEs with extra elements
/ self.orig_elem_per_pe)
};
Some(pe)
}
Expand All @@ -1596,21 +1608,21 @@ impl UnsafeArrayInner {
//index relative to subarray, return offset relative to subarray
// //#[tracing::instrument(skip_all)]
pub(crate) fn pe_full_offset_for_dist_index(&self, pe: usize, index: usize) -> Option<usize> {
// println!("pe_full_offset_for_dist_index pe {pe} index {index}");
let mut global_index = self.offset + index;

match self.distribution {
Distribution::Block => {
let rem_index = self.orig_remaining_elems * (self.orig_elem_per_pe + 1);
let mut elem_per_pe = self.orig_elem_per_pe;
if rem_index < self.size {
elem_per_pe += 1;
} else {
global_index = global_index - rem_index;
}
// println!("\tindex: {index} offset {} size {} global_index {global_index} rem_index {rem_index}",self.offset, self.size);
let offset = if global_index < rem_index {
global_index % elem_per_pe
//index is on a pe with extra elems
global_index - (pe * (self.orig_elem_per_pe + 1))
} else {
global_index % self.orig_elem_per_pe
//index is on a pe without extra elems
let temp_index = global_index - rem_index; //get the remainin index after accounter for PEs with extra elements
let temp_pe = temp_index / self.orig_elem_per_pe; //the pe after accounting for PEs with extra elements

temp_index - (temp_pe * self.orig_elem_per_pe)
};
Some(offset)
}
Expand All @@ -1627,13 +1639,22 @@ impl UnsafeArrayInner {

//index relative to subarray, return offset relative to subarray
pub(crate) fn pe_sub_offset_for_dist_index(&self, pe: usize, index: usize) -> Option<usize> {
let offset = self.pe_full_offset_for_dist_index(pe, index)?;
// println!(
// "pe_sub_offset_for_dist_index index {index} pe {pe} offset {}",
// self.offset
// );
let start_pe = self.pe_for_dist_index(0)?;

match self.distribution {
Distribution::Block => {
if self.offset <= offset {
Some(offset - self.offset)
if start_pe == pe {
if index < self.size {
Some(index)
} else {
None
}
} else {
None
self.pe_full_offset_for_dist_index(pe, index)
}
}
Distribution::Cyclic => {
Expand Down
1 change: 1 addition & 0 deletions tests/add.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ macro_rules! create_add_tests {
create_add_tests!(
(UnsafeArray, LocalLockArray, AtomicArray), // (UnsafeArray, AtomicArray, GenericAtomicArray, LocalLockArray),
("Block", "Cyclic"),
// (u8, f64),
(u8, u16, u32, u128, usize, i8, i16, i32, i128, isize, f32, f64),
(2, 3, 4),
(4, 19, 128)
Expand Down
132 changes: 66 additions & 66 deletions tests/array/arithmetic_ops/add_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,78 +487,78 @@ fn main() {
};

match array.as_str() {
"UnsafeArray" => match elem.as_str() {
"u8" => add_test!(UnsafeArray, u8, len, dist_type),
"u16" => add_test!(UnsafeArray, u16, len, dist_type),
"u32" => add_test!(UnsafeArray, u32, len, dist_type),
"u64" => add_test!(UnsafeArray, u64, len, dist_type),
"u128" => add_test!(UnsafeArray, u128, len, dist_type),
"usize" => add_test!(UnsafeArray, usize, len, dist_type),
"i8" => add_test!(UnsafeArray, i8, len, dist_type),
"i16" => add_test!(UnsafeArray, i16, len, dist_type),
"i32" => add_test!(UnsafeArray, i32, len, dist_type),
"i64" => add_test!(UnsafeArray, i64, len, dist_type),
"i128" => add_test!(UnsafeArray, i128, len, dist_type),
"isize" => add_test!(UnsafeArray, isize, len, dist_type),
"f32" => add_test!(UnsafeArray, f32, len, dist_type),
"f64" => add_test!(UnsafeArray, f64, len, dist_type),
"input" => input_test!(UnsafeArray, len, dist_type),
_ => eprintln!("unsupported element type"),
},
// "UnsafeArray" => match elem.as_str() {
// "u8" => add_test!(UnsafeArray, u8, len, dist_type),
// "u16" => add_test!(UnsafeArray, u16, len, dist_type),
// "u32" => add_test!(UnsafeArray, u32, len, dist_type),
// "u64" => add_test!(UnsafeArray, u64, len, dist_type),
// "u128" => add_test!(UnsafeArray, u128, len, dist_type),
// "usize" => add_test!(UnsafeArray, usize, len, dist_type),
// "i8" => add_test!(UnsafeArray, i8, len, dist_type),
// "i16" => add_test!(UnsafeArray, i16, len, dist_type),
// "i32" => add_test!(UnsafeArray, i32, len, dist_type),
// "i64" => add_test!(UnsafeArray, i64, len, dist_type),
// "i128" => add_test!(UnsafeArray, i128, len, dist_type),
// "isize" => add_test!(UnsafeArray, isize, len, dist_type),
// "f32" => add_test!(UnsafeArray, f32, len, dist_type),
// "f64" => add_test!(UnsafeArray, f64, len, dist_type),
// "input" => input_test!(UnsafeArray, len, dist_type),
// _ => eprintln!("unsupported element type"),
// },
"AtomicArray" => match elem.as_str() {
"u8" => add_test!(AtomicArray, u8, len, dist_type),
"u16" => add_test!(AtomicArray, u16, len, dist_type),
"u32" => add_test!(AtomicArray, u32, len, dist_type),
"u64" => add_test!(AtomicArray, u64, len, dist_type),
"u128" => add_test!(AtomicArray, u128, len, dist_type),
"usize" => add_test!(AtomicArray, usize, len, dist_type),
"i8" => add_test!(AtomicArray, i8, len, dist_type),
"i16" => add_test!(AtomicArray, i16, len, dist_type),
"i32" => add_test!(AtomicArray, i32, len, dist_type),
"i64" => add_test!(AtomicArray, i64, len, dist_type),
"i128" => add_test!(AtomicArray, i128, len, dist_type),
"isize" => add_test!(AtomicArray, isize, len, dist_type),
"f32" => add_test!(AtomicArray, f32, len, dist_type),
// "u16" => add_test!(AtomicArray, u16, len, dist_type),
// "u32" => add_test!(AtomicArray, u32, len, dist_type),
// "u64" => add_test!(AtomicArray, u64, len, dist_type),
// "u128" => add_test!(AtomicArray, u128, len, dist_type),
// "usize" => add_test!(AtomicArray, usize, len, dist_type),
// "i8" => add_test!(AtomicArray, i8, len, dist_type),
// "i16" => add_test!(AtomicArray, i16, len, dist_type),
// "i32" => add_test!(AtomicArray, i32, len, dist_type),
// "i64" => add_test!(AtomicArray, i64, len, dist_type),
// "i128" => add_test!(AtomicArray, i128, len, dist_type),
// "isize" => add_test!(AtomicArray, isize, len, dist_type),
// "f32" => add_test!(AtomicArray, f32, len, dist_type),
"f64" => add_test!(AtomicArray, f64, len, dist_type),
"input" => input_test!(AtomicArray, len, dist_type),
_ => eprintln!("unsupported element type"),
},
"LocalLockArray" => match elem.as_str() {
"u8" => add_test!(LocalLockArray, u8, len, dist_type),
"u16" => add_test!(LocalLockArray, u16, len, dist_type),
"u32" => add_test!(LocalLockArray, u32, len, dist_type),
"u64" => add_test!(LocalLockArray, u64, len, dist_type),
"u128" => add_test!(LocalLockArray, u128, len, dist_type),
"usize" => add_test!(LocalLockArray, usize, len, dist_type),
"i8" => add_test!(LocalLockArray, i8, len, dist_type),
"i16" => add_test!(LocalLockArray, i16, len, dist_type),
"i32" => add_test!(LocalLockArray, i32, len, dist_type),
"i64" => add_test!(LocalLockArray, i64, len, dist_type),
"i128" => add_test!(LocalLockArray, i128, len, dist_type),
"isize" => add_test!(LocalLockArray, isize, len, dist_type),
"f32" => add_test!(LocalLockArray, f32, len, dist_type),
"f64" => add_test!(LocalLockArray, f64, len, dist_type),
"input" => input_test!(LocalLockArray, len, dist_type),
_ => eprintln!("unsupported element type"),
},
"GlobalLockArray" => match elem.as_str() {
"u8" => add_test!(GlobalLockArray, u8, len, dist_type),
"u16" => add_test!(GlobalLockArray, u16, len, dist_type),
"u32" => add_test!(GlobalLockArray, u32, len, dist_type),
"u64" => add_test!(GlobalLockArray, u64, len, dist_type),
"u128" => add_test!(GlobalLockArray, u128, len, dist_type),
"usize" => add_test!(GlobalLockArray, usize, len, dist_type),
"i8" => add_test!(GlobalLockArray, i8, len, dist_type),
"i16" => add_test!(GlobalLockArray, i16, len, dist_type),
"i32" => add_test!(GlobalLockArray, i32, len, dist_type),
"i64" => add_test!(GlobalLockArray, i64, len, dist_type),
"i128" => add_test!(GlobalLockArray, i128, len, dist_type),
"isize" => add_test!(GlobalLockArray, isize, len, dist_type),
"f32" => add_test!(GlobalLockArray, f32, len, dist_type),
"f64" => add_test!(GlobalLockArray, f64, len, dist_type),
"input" => input_test!(GlobalLockArray, len, dist_type),
_ => {} //eprintln!("unsupported element type"),
},
// "LocalLockArray" => match elem.as_str() {
// "u8" => add_test!(LocalLockArray, u8, len, dist_type),
// "u16" => add_test!(LocalLockArray, u16, len, dist_type),
// "u32" => add_test!(LocalLockArray, u32, len, dist_type),
// "u64" => add_test!(LocalLockArray, u64, len, dist_type),
// "u128" => add_test!(LocalLockArray, u128, len, dist_type),
// "usize" => add_test!(LocalLockArray, usize, len, dist_type),
// "i8" => add_test!(LocalLockArray, i8, len, dist_type),
// "i16" => add_test!(LocalLockArray, i16, len, dist_type),
// "i32" => add_test!(LocalLockArray, i32, len, dist_type),
// "i64" => add_test!(LocalLockArray, i64, len, dist_type),
// "i128" => add_test!(LocalLockArray, i128, len, dist_type),
// "isize" => add_test!(LocalLockArray, isize, len, dist_type),
// "f32" => add_test!(LocalLockArray, f32, len, dist_type),
// "f64" => add_test!(LocalLockArray, f64, len, dist_type),
// "input" => input_test!(LocalLockArray, len, dist_type),
// _ => eprintln!("unsupported element type"),
// },
// "GlobalLockArray" => match elem.as_str() {
// "u8" => add_test!(GlobalLockArray, u8, len, dist_type),
// "u16" => add_test!(GlobalLockArray, u16, len, dist_type),
// "u32" => add_test!(GlobalLockArray, u32, len, dist_type),
// "u64" => add_test!(GlobalLockArray, u64, len, dist_type),
// "u128" => add_test!(GlobalLockArray, u128, len, dist_type),
// "usize" => add_test!(GlobalLockArray, usize, len, dist_type),
// "i8" => add_test!(GlobalLockArray, i8, len, dist_type),
// "i16" => add_test!(GlobalLockArray, i16, len, dist_type),
// "i32" => add_test!(GlobalLockArray, i32, len, dist_type),
// "i64" => add_test!(GlobalLockArray, i64, len, dist_type),
// "i128" => add_test!(GlobalLockArray, i128, len, dist_type),
// "isize" => add_test!(GlobalLockArray, isize, len, dist_type),
// "f32" => add_test!(GlobalLockArray, f32, len, dist_type),
// "f64" => add_test!(GlobalLockArray, f64, len, dist_type),
// "input" => input_test!(GlobalLockArray, len, dist_type),
// _ => {} //eprintln!("unsupported element type"),
// },
_ => eprintln!("unsupported array type"),
}
}

0 comments on commit 49c5976

Please sign in to comment.