diff --git a/examples/2d_linear_program.h b/examples/2d_linear_program.h index a43ee3a..d6054b5 100644 --- a/examples/2d_linear_program.h +++ b/examples/2d_linear_program.h @@ -77,7 +77,7 @@ auto linear_program_2d(const constraints& H_in, constraint c) { long top = std::min(2*i, n); // check for a violating constraint from i to top long loc = parlay::reduce(parlay::delayed_tabulate(top-i, [&] (long j) { - return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum()); + return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum()); if (loc == n) i = top; // no violing constraint found, repeat and double again else { // found a violating constraint at location loc @@ -85,11 +85,11 @@ auto linear_program_2d(const constraints& H_in, constraint c) { // i.e. H[loc] x c and h x c have opposite signs coord cr = cross(H[loc],c); auto Hf = parlay::filter(H.cut(0,loc), [&] (constraint h) { - return cr * cross(h,c) < 0;}); + return cr * cross(h,c) < 0;}); // find the tightest such constraint auto min = [&] (constraint a, constraint b) { - return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;}; + return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;}; constraint cx = parlay::reduce(Hf, parlay::binary_op(min,constraint{0.,0.,0.})); // update the optimal point and the index i diff --git a/examples/3d_range.h b/examples/3d_range.h index c8e42e2..f8dcc3b 100644 --- a/examples/3d_range.h +++ b/examples/3d_range.h @@ -75,8 +75,8 @@ struct search { leaf* TL = static_cast(T); for (int i = 0; i < TL->size; i++) if (TL->pts[i].id != p.id && - distance_squared(TL->pts[i].pnt) < r * r) - in_range.push_back(TL->pts[i].id); } + distance_squared(TL->pts[i].pnt) < r * r) + in_range.push_back(TL->pts[i].id); } // looks for points within range for p in subtree rooted at T. // Can return immediately if radius does not intersect the box. @@ -85,8 +85,8 @@ struct search { if (T->is_leaf) add_leaf(T); else { interior* TI = static_cast(T); - range_search_down(TI->left); - range_search_down(TI->right); + range_search_down(TI->left); + range_search_down(TI->right); } } } diff --git a/examples/BFS.h b/examples/BFS.h index 4cf3a62..675c58c 100644 --- a/examples/BFS.h +++ b/examples/BFS.h @@ -57,9 +57,9 @@ auto BFS(vertex start, const graph& G) { // keep the v that succeed in setting the visited array frontier = delayed::to_sequence(delayed::map_maybe(out, [&] (vertex v) { - bool expected = false; + bool expected = false; if ((!visited[v]) && visited[v].compare_exchange_strong(expected, true)) - return std::optional(v); + return std::optional(v); return std::optional();})); } diff --git a/examples/bigint_add.h b/examples/bigint_add.h index ae1ffb4..e4b3262 100644 --- a/examples/bigint_add.h +++ b/examples/bigint_add.h @@ -59,20 +59,20 @@ bigint add(const Bigint1& a, const Bigint2& b, bool extra_one=false) { } else { // do in parallel // check which digits will carry or propagate auto c = delayed::tabulate(na, [&] (long i) { - double_digit s = a[i] + static_cast(B(i)); - s += (i == 0 && extra_one); - return static_cast(2 * (s == mask) + (s >> digit_len));}); + double_digit s = a[i] + static_cast(B(i)); + s += (i == 0 && extra_one); + return static_cast(2 * (s == mask) + (s >> digit_len));}); // use scan to do the propagation auto f = [] (carry a, carry b) {return (b == propagate) ? a : b;}; auto cc = delayed::scan(c, parlay::binary_op(f, propagate)).first; auto z = delayed::zip(cc, parlay::iota(na)); result = delayed::to_sequence(delayed::map(z, [&] (auto p) { - auto [ci, i] = p; - return a[i] + B(i) + ci;})); + auto [ci, i] = p; + return a[i] + B(i) + ci;})); //auto cc = parlay::scan(c, parlay::binary_op(f, propagate)).first; //result = parlay::tabulate(na, [&] (long i) { - // return a[i] + B(i) + cc[i];}); + // return a[i] + B(i) + cc[i];}); } if ((a_sign == b_sign) && ((result[na-1] >> (digit_len - 1)) != a_sign)) result.push_back(a_sign ? mask : 1u); @@ -83,5 +83,5 @@ template bigint subtract(const Bigint1& a, const Bigint2& b) { // effectively negate b and add return add(a, delayed::map(b, [] (auto bv) {return ~bv;}), - true); + true); } diff --git a/examples/boruvka.h b/examples/boruvka.h index b4b7dd1..a3b207b 100644 --- a/examples/boruvka.h +++ b/examples/boruvka.h @@ -16,9 +16,9 @@ using edge = std::pair; using w_edge = std::pair; parlay::sequence boruvka(const parlay::sequence& E, - const parlay::sequence V, - parlay::sequence>& W, - parlay::sequence& P) { + const parlay::sequence V, + parlay::sequence>& W, + parlay::sequence& P) { //std::cout << E.size() << ", " << V.size() << std::endl; if (E.size() == 0) return parlay::sequence(); @@ -36,7 +36,7 @@ parlay::sequence boruvka(const parlay::sequence& E, return (W[u] == e.second || W[v] == e.second);}); auto V_new = star_contract(map(Es, [] (w_edge e) {return e.first;}), - V, P, parlay::random_generator(0)); + V, P, parlay::random_generator(0)); // update edges to new endpoints and filter out self edges auto ES = parlay::delayed::map(E, [&] (w_edge e) { diff --git a/examples/box_kdtree.h b/examples/box_kdtree.h index b92478a..7f837b2 100644 --- a/examples/box_kdtree.h +++ b/examples/box_kdtree.h @@ -74,7 +74,7 @@ struct tree_node { bool is_leaf() {return left == nullptr;} tree_node(tree_node* L, tree_node* R, - int cut_dim, float cut_off, Bounding_Box B) + int cut_dim, float cut_off, Bounding_Box B) : left(L), right(R), cut_dim(cut_dim), cut_off(cut_off), box(B) { n = L->n + R->n; @@ -99,9 +99,9 @@ struct tree_node { if (!is_leaf()) { if (left == nullptr || right == nullptr) abort(); parlay::par_do_if(n > 1000, - [&] () { node_allocator.retire(left);}, - [&] () { node_allocator.retire(right);} - );}; + [&] () { node_allocator.retire(left);}, + [&] () { node_allocator.retire(right);} + );}; } }; @@ -158,8 +158,8 @@ cut_info best_cut(parlay::sequence const &E, range r, range r1, range r2) using events_pair = std::pair, parlay::sequence>; events_pair split_events(const parlay::sequence &box_ranges, - const parlay::sequence &events, - float cut_off) { + const parlay::sequence &events, + float cut_off) { index_t n = events.size(); auto lower = parlay::sequence::uninitialized(n); auto upper = parlay::sequence::uninitialized(n); @@ -173,9 +173,9 @@ events_pair split_events(const parlay::sequence &box_ranges, // n is the number of events (i.e. twice the number of triangles) tree_node* generate_node(Ranges &boxes, - Events events, - Bounding_Box B, - size_t maxDepth) { + Events events, + Bounding_Box B, + size_t maxDepth) { index_t n = events[0].size(); if (n <= 2 || maxDepth == 0) return tree_node::node_allocator.allocate(std::move(events), n, B); @@ -218,9 +218,9 @@ tree_node* generate_node(Ranges &boxes, for (int i=0; i < 3; i++) events[i] = parlay::sequence(); tree_node *L, *R; parlay::par_do([&] () {L = generate_node(boxes, std::move(left_events), - BBL, maxDepth-1);}, - [&] () {R = generate_node(boxes, std::move(right_events), - BBR, maxDepth-1);}); + BBL, maxDepth-1);}, + [&] () {R = generate_node(boxes, std::move(right_events), + BBR, maxDepth-1);}); return tree_node::node_allocator.allocate(L, R, cut_dim, cut_off, B); } @@ -236,9 +236,9 @@ auto kdtree_from_boxes(Boxes& boxes) { events[d] = parlay::sequence(2*n); ranges[d] = parlay::sequence(n); parlay::parallel_for(0, n, [&] (size_t i) { - events[d][2*i] = event(boxes[i][d][0], i, false); - events[d][2*i+1] = event(boxes[i][d][1], i, true); - ranges[d][i] = boxes[i][d];}); + events[d][2*i] = event(boxes[i][d][0], i, false); + events[d][2*i+1] = event(boxes[i][d][1], i, true); + ranges[d][i] = boxes[i][d];}); parlay::sort_inplace(events[d], [] (event a, event b) {return a.v < b.v;}); boundingBox[d] = range{events[d][0].v, events[d][2*n-1].v}; } diff --git a/examples/counting_sort.cpp b/examples/counting_sort.cpp index c96bacb..9172c1f 100644 --- a/examples/counting_sort.cpp +++ b/examples/counting_sort.cpp @@ -30,21 +30,23 @@ int main(int argc, char* argv[]) { auto data = parlay::tabulate(n, [&] (long i) { auto r = gen[i]; return dis(r);}); - + parlay::internal::timer t("Time"); parlay::sequence result(n); for (int i=0; i < 5; i++) { t.start(); counting_sort(data.begin(), data.end(), - result.begin(), - data.begin(), - num_buckets); + result.begin(), + data.begin(), + num_buckets); t.next("counting_sort"); } auto first_ten = result.head(10); auto last_ten = result.tail(10); - std::cout << "first 10 elements: " << parlay::to_chars(first_ten) << std::endl; - std::cout << "last 10 elements: " << parlay::to_chars(last_ten) << std::endl; + std::cout << "first 10 elements: " << parlay::to_chars(first_ten) + << std::endl; + std::cout << "last 10 elements: " << parlay::to_chars(last_ten) + << std::endl; } } diff --git a/examples/counting_sort.h b/examples/counting_sort.h index b8c63f6..fd880b9 100644 --- a/examples/counting_sort.h +++ b/examples/counting_sort.h @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -13,10 +14,20 @@ // of each key there are. It then using scan to calculate the offsets // for each bucket in each partition, and does a final pass placing // all keys in their correct position. +// +// For input of size n, and for m buckets +// Work = O(n) +// Span = O(m + n / m) // ************************************************************** using counter_type = unsigned long; +void prefetch(void* l) { +#if defined(__GNUC__) || defined(__clang__) + __builtin_prefetch (l); +#endif +} + // ************************************************************** // Input: // begin and end iterators for the values to be rearranged @@ -30,65 +41,59 @@ using counter_type = unsigned long; // ************************************************************** template parlay::sequence -counting_sort(const InIt& begin, const InIt& end, +counting_sort(const InIt& begin, const InIt& end, OutIt out, const KeyIt& keys, long num_buckets) { - long n = end - begin; + counter_type n = end - begin; if (n == 0) return parlay::sequence(1, 0); - long num_parts = std::min(1000l, n / (num_buckets * 64) + 1); - long part_size = (n - 1)/num_parts + 1; + long num_parts = std::min(1000l, n / (num_buckets * 64) + 1); + counter_type part_n = (n - 1)/num_parts + 1; + long m = num_buckets * num_parts; // first count buckets within each partition - auto counts = parlay::sequence::uninitialized(num_buckets * num_parts); + auto counts = parlay::sequence::uninitialized(m); parlay::parallel_for(0, num_parts, [&] (long i) { - long start = i * part_size; - long end = std::min(start + part_size, n); - for (long j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0; - for (long j = start; j < end; j++) counts[i*num_buckets + keys[j]]++; + std::vector local_counts(num_buckets); + for (long j = 0; j < num_buckets; j++) + local_counts[j] = 0; + for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++) + local_counts[keys[j]]++; + // transpose so equal buckets are adjacent + for (long j = 0; j < num_buckets; j++) + counts[num_parts * j + i] = local_counts[j]; }, 1); - // transpose the counts if more than one part - parlay::sequence trans_counts; - if (num_parts > 1) { - trans_counts = parlay::sequence::uninitialized(num_buckets * num_parts); - parlay::parallel_for(0, num_buckets, [&] (long i) { - for (size_t j = 0; j < num_parts; j++) - trans_counts[i* num_parts + j] = counts[j * num_buckets + i];}, 1); - } else trans_counts = std::move(counts); - // scan for offsets for all buckets - parlay::scan_inplace(trans_counts); + parlay::scan_inplace(counts); // go back over partitions to place in final location parlay::parallel_for(0, num_parts, [&] (long i) { - long start = i * part_size; - long end = std::min(start + part_size, n); - parlay::sequence local_offsets(num_buckets); - - // transpose back + std::vector local_offsets(num_buckets); + // transpose back into local offsets for (long j = 0; j < num_buckets; j++) - local_offsets[j] = trans_counts[num_parts * j + i]; - + local_offsets[j] = counts[num_parts * j + i]; // copy to output - for (long j = start; j < end; j++) { + for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++) { counter_type k = local_offsets[keys[j]]++; - // prefetching speeds up the code - #if defined(__GNUC__) || defined(__clang__) - __builtin_prefetch (((char*) &out[k]) + 64); - #endif + prefetch(((char*) &out[k]) + 64); out[k] = begin[j]; }}, 1); - return parlay::tabulate(num_buckets+1, [&] (long i) { - return (i == num_buckets) ? (counter_type) n : trans_counts[i * num_parts];}); + // return offsets for each bucket. + // includes extra element at end containing n + return parlay::tabulate(num_buckets + 1, [&] (long i) { + return (i == num_buckets) ? n : counts[i * num_parts];}); } -// A version that uses ranges as inputs and generates its own output sequence +// A wrapper that uses ranges as inputs and generates its own output +// sequence template auto counting_sort(const InRange& in, const KeysRange& keys, - long num_buckets) { - auto out = parlay::sequence::uninitialized(in.size()); - auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(), - num_buckets); + long num_buckets) { + using V = typename InRange::value_type; + auto out = parlay::sequence::uninitialized(in.size()); + auto offsets = counting_sort(in.begin(), in.end(), + out.begin(), keys.begin(), + num_buckets); return std::pair(std::move(out), std::move(offsets)); } diff --git a/examples/fast_fourier_transform.h b/examples/fast_fourier_transform.h index f071cce..bbba27f 100644 --- a/examples/fast_fourier_transform.h +++ b/examples/fast_fourier_transform.h @@ -24,13 +24,13 @@ void fft_recursive(long n, long s, T const* in, T* out, T const* w) { out[1] = in[0] - in[s]; } else { parlay::par_do_if(n >= 256, - [&] {fft_recursive(n/2, 2*s, in, out, w);}, - [&] {fft_recursive(n/2, 2*s, in+s, out+n/2, w);}); + [&] {fft_recursive(n/2, 2*s, in, out, w);}, + [&] {fft_recursive(n/2, 2*s, in+s, out+n/2, w);}); parlay::parallel_for(0, n/2, [&] (long i) { - T p = out[i]; - T q = out[i+n/2] * w[i * s]; - out[i] = p + q; - out[i+n/2] = p - q;}, 1000); + T p = out[i]; + T q = out[i+n/2] * w[i * s]; + out[i] = p + q; + out[i+n/2] = p - q;}, 1000); } } @@ -72,16 +72,16 @@ auto complex_fft(const parlay::sequence& a) { // loop-based fft (assumes A is in bit-reveresed order) template auto fft_base(parlay::sequence& A, - const parlay::sequence& w) { + const parlay::sequence& w) { long n = A.size(); int is = n/2; for (int s = 1; s < n; s = s*2, is = is/2) for (int i = 0; i < n; i += 2*s) for (int j = 0; j < s; j++) { - T p = A[i + j]; - T q = A[i + j + s] * w[j*is]; - A[i + j] = p + q; - A[i + j + s] = p - q; + T p = A[i + j]; + T q = A[i + j + s] * w[j*is]; + A[i + j] = p + q; + A[i + j + s] = p - q; } } @@ -103,22 +103,22 @@ auto fft_transpose(const parlay::sequence>& cols, T nth_root parlay::sequence wc = powers(std::pow(nth_root, num_cols), num_rows); auto columns = parlay::tabulate(num_cols, [&] (long i) { auto c = parlay::tabulate(num_rows, [&] (long j) { - return cols[i][br[j]];}); + return cols[i][br[j]];}); fft_base(c, wc); auto w = powers(std::pow(nth_root, i), num_rows); // rotate to avoid stride problems with set-associative cache return parlay::tabulate(num_rows, [&] (long j) { - int k = (num_rows + j - i) & (num_rows - 1); - return c[k]*w[k];});}); + int k = (num_rows + j - i) & (num_rows - 1); + return c[k]*w[k];});}); // transpose and process rows br = bit_reverse(num_cols); auto wr = powers(std::pow(nth_root, num_rows), num_cols); return parlay::tabulate(num_rows, [&] (long j) { auto row = parlay::tabulate(num_cols, [&] (long i) { - return columns[i][i+j & (num_rows - 1)];}); // transpose + return columns[i][i+j & (num_rows - 1)];}); // transpose auto r = parlay::tabulate(num_cols, [&] (long i) { - return row[br[i]];}); + return row[br[i]];}); fft_base(r, wr); return r;}); } diff --git a/examples/hash_map.h b/examples/hash_map.h index 7ffc170..f63556a 100644 --- a/examples/hash_map.h +++ b/examples/hash_map.h @@ -23,9 +23,9 @@ // ************************************************************** template , - typename Equal = std::equal_to<>> + typename V, + typename Hash = parlay::hash, + typename Equal = std::equal_to<>> struct hash_map { private: using KV = std::pair; @@ -57,27 +57,27 @@ struct hash_map { while (true) { state status = H[i].status; if (status == full || status == tomb) { - if (H[i].key == k) { - if (status == full) return false; - else if (H[i].status.compare_exchange_strong(status, locked)) { - H[i].value = v; - H[i].status = full; - return true; - } - } else { - if (count++ == std::min(1000l,m)) { - std::cout << "Hash table overfull" << std::endl; - return false; - } - i = next_index(i); - } + if (H[i].key == k) { + if (status == full) return false; + else if (H[i].status.compare_exchange_strong(status, locked)) { + H[i].value = v; + H[i].status = full; + return true; + } + } else { + if (count++ == std::min(1000l,m)) { + std::cout << "Hash table overfull" << std::endl; + return false; + } + i = next_index(i); + } } else if (status == empty) { - if (H[i].status.compare_exchange_strong(status, locked)) { - H[i].key = k; - H[i].value = v; - H[i].status = full; - return true; - } + if (H[i].status.compare_exchange_strong(status, locked)) { + H[i].key = k; + H[i].value = v; + H[i].status = full; + return true; + } } } } @@ -96,11 +96,11 @@ struct hash_map { while (true) { if (H[i].status == empty || H[i].status == locked) return {}; if (H[i].key == k) { - state old = full; - if (H[i].status == full && - H[i].status.compare_exchange_strong(old, tomb)) - return std::move(H[i].value); - else return {}; + state old = full; + if (H[i].status == full && + H[i].status.compare_exchange_strong(old, tomb)) + return std::move(H[i].value); + else return {}; } i = next_index(i); } @@ -108,12 +108,12 @@ struct hash_map { parlay::sequence keys() { return parlay::map_maybe(H, [] (const entry &x) { - return (x.status == full) ? std::optional{x.key} : std::optional{};}); + return (x.status == full) ? std::optional{x.key} : std::optional{};}); } size_t size() { return parlay::reduce(parlay::delayed_map(H, [&] (auto const &x) -> long { - return x.status == full;})); + return x.status == full;})); } }; diff --git a/examples/integer_sort.cpp b/examples/integer_sort.cpp index 680dea3..947c884 100644 --- a/examples/integer_sort.cpp +++ b/examples/integer_sort.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -23,10 +22,10 @@ int main(int argc, char* argv[]) { catch (...) { std::cout << usage << std::endl; return 1; } using int_type = unsigned int; - + parlay::random_generator gen; std::uniform_int_distribution dis(0, n-1); - int bits = std::ceil(std::log2(n)); + int num_bits = std::ceil(std::log2(n)); // generate random int_type values between 0 and n parlay::sequence data = parlay::tabulate(n, [&] (long i) { @@ -38,11 +37,12 @@ int main(int argc, char* argv[]) { for (int i = 0; i < 5; i++) { result = data; t.start(); - ::integer_sort(result, bits); + ::integer_sort(result, num_bits); t.next("integer_sort"); } auto first_ten = result.head(10); - std::cout << "first 10 elements: " << parlay::to_chars(first_ten) << std::endl; + std::cout << "first 10 elements: " << parlay::to_chars(first_ten) + << std::endl; } } diff --git a/examples/integer_sort.h b/examples/integer_sort.h index bba0a74..2d24005 100644 --- a/examples/integer_sort.h +++ b/examples/integer_sort.h @@ -25,18 +25,21 @@ // A bottom up radix sort with 8-bits per round template -void bottom_up_radix_sort(Range in, Range out, long bits, long bot_bit, bool inplace) { +void bottom_up_radix_sort(Range in, Range out, + long num_bits, long bot_bit, bool inplace) { const long radix_bits = 8; - if (bot_bit >= bits) { + if (bot_bit >= num_bits) { if (!inplace) parlay::copy(in, out); } else { - long num_buckets = std::min(1l << radix_bits, 1l << (bits - bot_bit)); + long num_buckets = std::min(1l << radix_bits, + 1l << (num_bits - bot_bit)); // the keys are bits from bot_bit to bits auto keys = parlay::delayed::tabulate(in.size(), [&] (long i) { return (in[i] >> bot_bit) & (num_buckets-1);}); counting_sort(in.begin(), in.end(), out.begin(), keys.begin(), - num_buckets); - bottom_up_radix_sort(out, in, bits, bot_bit + radix_bits, !inplace); + num_buckets); + bottom_up_radix_sort(out, in, + num_bits, bot_bit + radix_bits, !inplace); } } @@ -51,16 +54,19 @@ void radix_sort(Range in, Range out, long bits, bool inplace) { bottom_up_radix_sort(in, out, bits, 0, inplace); else { // number of bits in radix - long radix_bits = std::min(bits, std::min(10l, std::ceil(std::log2(2*n/cutoff)))); + long radix_bits = std::min({bits, + 10l, + (long) std::ceil(std::log2(2*n/cutoff))}); long num_buckets = 1l << radix_bits; // extract the needed bits for the keys auto keys = parlay::delayed::tabulate(n, [&] (long i) { return (in[i] >> (bits - radix_bits)) & (num_buckets-1);}); - - // sort in into the out based on keys - auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(), - num_buckets); + + // sort in into out based on keys + auto offsets = counting_sort(in.begin(), in.end(), + out.begin(), keys.begin(), + num_buckets); // now recursively sort each bucket parlay::parallel_for(0, num_buckets, [&] (long i) { @@ -68,16 +74,17 @@ void radix_sort(Range in, Range out, long bits, bool inplace) { long last = offsets[i+1]; // end of bucket // note that order of in and out is flipped (as is inplace) radix_sort(out.cut(first,last), in.cut(first,last), - bits - radix_bits, !inplace); + bits - radix_bits, !inplace); }, 1); } } -// An inplace (i.e., result is in the same place as the input) integer_sort -// Requires O(n) temp space +// An inplace (i.e., result is in the same place as the input) +// integer_sort. Requires O(n) temp space. template void integer_sort(Range& in, long bits) { - auto tmp = parlay::sequence::uninitialized(in.size()); + using V = typename Range::value_type; + auto tmp = parlay::sequence::uninitialized(in.size()); auto in_slice = parlay::make_slice(in); auto tmp_slice = parlay::make_slice(tmp); radix_sort(in_slice, tmp_slice, bits, true); diff --git a/examples/lasso_regression.h b/examples/lasso_regression.h index eb3a709..7b1aec7 100644 --- a/examples/lasso_regression.h +++ b/examples/lasso_regression.h @@ -131,7 +131,7 @@ void solve_lasso(const sparse_matrix& AT, const vector& y, // Convergence check if (step > 0) { if (max_change <= (step + 1) * delta_threshold || counter > 100) { - step--; counter = 0; + step--; counter = 0; } } else { // Only use actual objective on last step. real obj = objective(Ax, x, y, lambda); diff --git a/examples/ldd_connectivity.h b/examples/ldd_connectivity.h index 4b6c8f5..fddf637 100644 --- a/examples/ldd_connectivity.h +++ b/examples/ldd_connectivity.h @@ -30,12 +30,12 @@ ldd_connectivity(const graph& G, float beta = .5) { // Extract the remaining edges auto E_r = parlay::flatten(parlay::tabulate(n, [&] (vertex u) { - auto remain = filter(G[u], [&] (vertex v) {return P[u] != P[v];}); - return map(remain, [&] (vertex v) {return edge(P[u],P[v]);}, 1000);})); + auto remain = filter(G[u], [&] (vertex v) {return P[u] != P[v];}); + return map(remain, [&] (vertex v) {return edge(P[u],P[v]);}, 1000);})); // Extract the remaining vertices parlay::sequence V_r = parlay::pack_index(parlay::tabulate(n, [&] (vertex v) { - return P[v] == v;})); + return P[v] == v;})); // Finish off with star contraction on remaining graph auto roots = star_contract(E_r, V_r, P, parlay::random_generator(0)); diff --git a/examples/le_list.h b/examples/le_list.h index 09afd62..bd4bf26 100644 --- a/examples/le_list.h +++ b/examples/le_list.h @@ -51,7 +51,7 @@ class le_list{ // create n empty le-lists, each of size capacity A = tabulate(n, [=] (vertex i) { return tabulate(capacity, [] (int j){ - return std::make_pair((vertex) 0, max_distance);});}); + return std::make_pair((vertex) 0, max_distance);});}); // initial sizes are 0 sizes = tabulate>(n, [] (vertex i) {return 0;}); } @@ -68,7 +68,7 @@ class le_list{ // lists is unspecified. sequence>> pack() { return tabulate(sizes.size(), [&] (int i){ - return to_sequence(A[i].head(sizes[i]));}); + return to_sequence(A[i].head(sizes[i]));}); } }; @@ -81,11 +81,11 @@ struct vertex_info{ template auto truncated_bfs(graph& G, graph& GT, - sequence& srcs, - sequence& order, - sequence& delta_ro, - sequence>& delta, - le_list& L){ + sequence& srcs, + sequence& order, + sequence& delta_ro, + sequence>& delta, + le_list& L){ vertex n = G.size(); auto vtxs = sequence(n); distance dist = 0; @@ -102,21 +102,21 @@ auto truncated_bfs(graph& G, graph& GT, // function to apply on each edge s->d auto edge_f = [&] (vertex s, vertex d) -> bool { if ((vtxs[d].root_ro == Empty - || order[vtxs[s].root_ro] < order[vtxs[d].root_ro]) - && (dist < delta_ro[d])) { + || order[vtxs[s].root_ro] < order[vtxs[d].root_ro]) + && (dist < delta_ro[d])) { // if d is unvisited or root priority of d is larger than s, // and distance to d is greater than current distance, // then try to update the root of d auto less = [&] (vertex newv, vertex old) { - return (old == Empty || (order[newv] < order[old]));}; + return (old == Empty || (order[newv] < order[old]));}; if (write_min(&vtxs[d].root, vtxs[s].root_ro, less)) { - L.insert(d, vtxs[s].root_ro, dist); - // update nearest distance to d - write_min(&delta[d], dist, std::less()); - // only true if first to update d (avoids duplicates in frontier) - distance old = vtxs[d].step; - return (dist > old) && vtxs[d].step.compare_exchange_strong(old, dist); + L.insert(d, vtxs[s].root_ro, dist); + // update nearest distance to d + write_min(&delta[d], dist, std::less()); + // only true if first to update d (avoids duplicates in frontier) + distance old = vtxs[d].step; + return (dist > old) && vtxs[d].step.compare_exchange_strong(old, dist); } return false; } @@ -128,7 +128,7 @@ auto truncated_bfs(graph& G, graph& GT, // in this chunk by the highest priority in the chunk. auto cond_f = [&] (vertex d) { return (dist < delta_ro[d] && - (vtxs[d].root == Empty || order[vtxs[d].root] != start)); + (vtxs[d].root == Empty || order[vtxs[d].root] != start)); }; auto frontier_map = ligra::edge_map(G, GT, edge_f, cond_f); @@ -156,7 +156,7 @@ auto create_le_list(Graph& G, Graph& GT, sequence& order) { // initial distances of "infinity" auto delta = tabulate>(n, [] (vertex i) { - return max_distance;}); + return max_distance;}); // BFS using prefix doubling, i.e. increasing prefixes of doubling size for(vertex r = 0; r < n; r = 2*r + 1){ @@ -171,11 +171,11 @@ auto create_le_list(Graph& G, Graph& GT, sequence& order) { parallel_for(0, n, [&] (vertex i){ //Sort lists in order of increasing vertex number (based on permutation) sort_inplace(lists[i], [&] (auto p1, auto p2) { - return inv_order[p1.first] < inv_order[p2.first];}); + return inv_order[p1.first] < inv_order[p2.first];}); //Prune lists for "extra elements" auto flags = tabulate(lists[i].size(), [&lists, i] (int j){ - return j==0 || lists[i][j].second < lists[i][j-1].second;}); + return j==0 || lists[i][j].second < lists[i][j-1].second;}); lists[i] = pack(lists[i],flags); }); return lists; diff --git a/examples/lubys.cpp b/examples/lubys.cpp index 19ed4ad..d1fa62e 100644 --- a/examples/lubys.cpp +++ b/examples/lubys.cpp @@ -34,19 +34,22 @@ int main(int argc, char* argv[]) { parlay::internal::timer t("Time"); parlay::sequence in_set; for (int i=0; i < 5; i++) { - in_set = MIS(G); + in_set = luby_MIS(G); t.next("lubys"); } - + // check whether an mis bool bad_mis = parlay::any_of(parlay::iota(n), [&] (vertex u) { - return ((in_set[u] && parlay::any_of(G[u], [&] (vertex v) {return in_set[v];})) || - (!in_set[u] && parlay::none_of(G[u], [&] (vertex v) {return in_set[v];})));}); + return ((in_set[u] && parlay::any_of(G[u], [&] (vertex v) { + return in_set[v];}))|| + (!in_set[u] && parlay::none_of(G[u], [&] (vertex v) { + return in_set[v];})));}); if (bad_mis > 0) std::cout << "not a maximal independent set" << std::endl; - - int num_in_set = parlay::reduce(parlay::map(in_set,[] (bool a) {return a ? 1 : 0;})); + + int num_in_set = parlay::reduce(parlay::map(in_set,[] (bool a) { + return a ? 1 : 0;})); std::cout << "number in set: " << num_in_set << std::endl; } } diff --git a/examples/lubys.h b/examples/lubys.h index cd60239..5a6c0d9 100644 --- a/examples/lubys.h +++ b/examples/lubys.h @@ -3,9 +3,7 @@ #include #include -#include #include -#include // ************************************************************** // Luby's algorithm for Maximal Independent Set (MIS). From: @@ -18,7 +16,7 @@ // Note graph is copied since it is mutated internally template -parlay::sequence MIS(const graph& G_in) { +parlay::sequence luby_MIS(const graph& G_in) { using vertex = typename graph::value_type::value_type; long n = G_in.size(); parlay::random_generator gen(0); @@ -28,10 +26,11 @@ parlay::sequence MIS(const graph& G_in) { const graph* G = &G_in; graph G_nxt(n); - // Each vertex is either in the set, out of the set, or unknown (initially) + // Each vertex is either in the set, out of the set, or unknown + // (initially) enum state : char {InSet, OutSet, Unknown}; auto states = parlay::tabulate>(n, [&] (long i) { - return Unknown;}); + return Unknown;}); // initial active vetices (all of them) auto V = parlay::tabulate(n, [] (vertex i) {return i;}); @@ -43,18 +42,16 @@ parlay::sequence MIS(const graph& G_in) { while (V.size() > 0) { // pick random priorities for the remaining vertices - for_each(V, [&] (vertex u) {auto r = gen[u]; priority[u] = dis(r);}); + for_each(V, [&] (vertex u) { + auto r = gen[u]; priority[u] = dis(r);}); // for each remaining vertex, if it has a local max priority then // add it to the MIS and remove neighbors for_each(V, [&] (vertex u) { - // if (priority[u] > - // reduce(parlay::delayed::map((*G)[u], [&] (vertex v) {return priority[v];}), - // parlay::maxm())) { if (parlay::find_if((*G)[u], [&] (vertex v) { - return priority[v] >= priority[u];}) == (*G)[u].end()) { - states[u] = InSet; - for_each((*G)[u], [&] (vertex v) { + return priority[v] >= priority[u];}) == (*G)[u].end()) { + states[u] = InSet; + for_each((*G)[u], [&] (vertex v) { if (states[v] == Unknown) states[v] = OutSet;}); }}); @@ -64,12 +61,13 @@ parlay::sequence MIS(const graph& G_in) { // only keep edges for which both endpoints are unknown for_each(V, [&] (vertex u) { G_nxt[u] = parlay::filter((*G)[u], [&] (vertex v) { - return states[v] == Unknown;});}); + return states[v] == Unknown;});}); // advanced state of random number generator gen = gen[n]; - G = &G_nxt; + G = &G_nxt; // only needed on first iteration } + // convert to booleans indicating if in the MIS return parlay::map(states, [] (auto& s) {return s.load() == InSet;}); } diff --git a/examples/minimum_edit_distance.h b/examples/minimum_edit_distance.h index cbf41af..1de21dc 100644 --- a/examples/minimum_edit_distance.h +++ b/examples/minimum_edit_distance.h @@ -35,7 +35,7 @@ long minimum_edit_distance(const Seq& s1, const Seq& s2) { int* col_in, int* col_out, auto s2, int m, int diag) { parlay::sequence row(n); - for (int i=0; i < n; i++) row[i] = row_in[i]; + for (int i=0; i < n; i++) row[i] = row_in[i]; for (int j=0; j < m; j++) { int prev = col_in[j]; for (int i=0; i < n; i++) { diff --git a/examples/radix_tree.h b/examples/radix_tree.h index e24fdef..a3dc658 100644 --- a/examples/radix_tree.h +++ b/examples/radix_tree.h @@ -64,7 +64,7 @@ struct radix_tree { // the same LCP value. The following identifies the roots of such // subtrees, and labels each with an index. auto root_flags = parlay::tabulate(n - 1, [&] (index i) { - return (i == Parents[i] || lcps[i] != lcps[Parents[i]]);}); + return (i == Parents[i] || lcps[i] != lcps[Parents[i]]);}); auto root_locs = parlay::pack_index(root_flags); index num_roots = root_locs.size(); parlay::sequence root_ids(n-1); @@ -73,36 +73,36 @@ struct radix_tree { // find root of subtree for location i auto cluster_root = [&] (index i) { while (i != Parents[i] && lcps[i] == lcps[Parents[i]]) - i = Parents[i]; + i = Parents[i]; return i;}; // overall root root = root_ids[*parlay::find_if(root_ids, [&] (long i) { - return Parents[i] == i;})]; + return Parents[i] == i;})]; // For each root of a subtree return a pair of the index of its // parent subtree root node, and its index*2 + 1. auto internal = parlay::delayed_tabulate(num_roots, [&] (index i) { - if (i == root) return std::pair{num_roots,2*i+1}; - index j = root_locs[i]; - index parent = root_ids[cluster_root(Parents[j])]; - return std::pair{parent, 2 * i + 1};}); + if (i == root) return std::pair{num_roots,2*i+1}; + index j = root_locs[i]; + index parent = root_ids[cluster_root(Parents[j])]; + return std::pair{parent, 2 * i + 1};}); // For each leaf (one per input string) return a pair // of the index of its parent subtree root node, and 2* its index. auto leaves = parlay::delayed_tabulate(n, [&] (index i) { - // for each element of SA find larger LCP on either side - // the root of its subtree will be its parent in the radix tree - index parent = (i == 0) ? 0 : ((i==n-1) ? i-1 : (lcps[i-1] > lcps[i] ? i-1 : i)); - parent = root_ids[cluster_root(parent)]; - return std::pair{parent, 2*i}; }); + // for each element of SA find larger LCP on either side + // the root of its subtree will be its parent in the radix tree + index parent = (i == 0) ? 0 : ((i==n-1) ? i-1 : (lcps[i-1] > lcps[i] ? i-1 : i)); + parent = root_ids[cluster_root(parent)]; + return std::pair{parent, 2*i}; }); // generate a mapping from node indices to indices of their children auto groups = parlay::group_by_index(parlay::append(internal, leaves), num_roots+1); // Create nodes. tree = parlay::tabulate(num_roots, [&] (index i) { - index j = root_locs[i]; - return node{lcps[j], j, std::move(groups[i])};}); + index j = root_locs[i]; + return node{lcps[j], j, std::move(groups[i])};}); } radix_tree() {} diff --git a/examples/range_min.h b/examples/range_min.h index 9c74034..be351c9 100644 --- a/examples/range_min.h +++ b/examples/range_min.h @@ -18,7 +18,7 @@ // Query takes O(block_size) time // ************************************************************** template , - typename Uint=unsigned int> + typename Uint=unsigned int> class range_min { public: range_min(Seq &a, Less&& less = {}, long block_size=32) diff --git a/examples/ray_trace.h b/examples/ray_trace.h index a2d7839..989e1b0 100644 --- a/examples/ray_trace.h +++ b/examples/ray_trace.h @@ -32,8 +32,8 @@ struct vect3d { return a[0]*b[0] + a[1]*b[1] + a[2]*b[2];} vect3d cross(vect3d b) const { return vect3d(a[1]*b[2] - a[2]*b[1], - a[2]*b[0] - a[0]*b[2], - a[0]*b[1] - a[1]*b[0]); } + a[2]*b[0] - a[0]*b[2], + a[0]*b[1] - a[1]*b[0]); } coord& operator[](int i) {return a[i];}; vect3d() : a({0.0,0.0,0.0}) {} vect3d(coord x, coord y, coord z) : a({x,y,z}) {} @@ -54,8 +54,8 @@ range get_range(coord c0, coord c1, coord c2) { inline bool in_box(point3d p, Bounding_Box B) { return (p[0] >= (B[0][0] - epsilon) && p[0] <= (B[0][1] + epsilon) && - p[1] >= (B[1][0] - epsilon) && p[1] <= (B[1][1] + epsilon) && - p[2] >= (B[2][0] - epsilon) && p[2] <= (B[2][1] + epsilon)); + p[1] >= (B[1][0] - epsilon) && p[1] <= (B[1][1] + epsilon) && + p[2] >= (B[2][0] - epsilon) && p[2] <= (B[2][1] + epsilon)); } // Code for ray_triangle_intersect is based on: @@ -98,9 +98,9 @@ coord ray_triangle_intersect(ray r, triangle tri) { // The triangles are given by n indices I into the triangle array Tri. // -1 is returned if there is no intersection index_t find_ray(ray r, - parlay::sequence const &I, - Triangles const &Tri, - Bounding_Box B) { + parlay::sequence const &I, + Triangles const &Tri, + Bounding_Box B) { auto [o, d] = r; index_t n = I.size(); coord tMin = std::numeric_limits::max(); @@ -166,7 +166,7 @@ parlay::sequence ray_cast(const Triangles &triangles, const Rays &rays) auto boxes = parlay::tabulate(triangles.size(), [&] (size_t i) { auto [p0,p1,p2] = triangles[i]; return Bounding_Box{get_range(p0[0],p1[0],p2[0]),get_range(p0[1],p1[1],p2[1]), - get_range(p0[2],p1[2],p2[2])};}); + get_range(p0[2],p1[2],p2[2])};}); tree_node* R = kdtree_from_boxes(boxes); diff --git a/examples/rectangle_intersection.h b/examples/rectangle_intersection.h index e7658e0..688721b 100644 --- a/examples/rectangle_intersection.h +++ b/examples/rectangle_intersection.h @@ -30,21 +30,21 @@ void process_recursive(tree_node* T, const Boxes &rectangles, pair_seq* results) if (T->is_leaf()) { // get all intersections within the leaf *results = parlay::flatten(parlay::tabulate(T->n, [&] (index_t i) { - index_t idx_a = T->box_indices[i]; - auto x = parlay::tabulate(T->n - i - 1, [&] (long j) { - index_t idx_b = T->box_indices[i+j+1]; - if (!intersect(rectangles[idx_a], rectangles[idx_b])) - return std::pair{-1,-1}; - else return std::pair{std::min(idx_a, idx_b), std::max(idx_a, idx_b)};}, 1000); - return filter(x, [] (auto p) {return p.first > -1;});}, 100)); + index_t idx_a = T->box_indices[i]; + auto x = parlay::tabulate(T->n - i - 1, [&] (long j) { + index_t idx_b = T->box_indices[i+j+1]; + if (!intersect(rectangles[idx_a], rectangles[idx_b])) + return std::pair{-1,-1}; + else return std::pair{std::min(idx_a, idx_b), std::max(idx_a, idx_b)};}, 1000); + return filter(x, [] (auto p) {return p.first > -1;});}, 100)); } else { // recurse to two subtrees parlay::par_do([&] {process_recursive(T->left, rectangles, results);}, - [&] {process_recursive(T->right, rectangles, - results + T->left->num_leaves);}); + [&] {process_recursive(T->right, rectangles, + results + T->left->num_leaves);}); } } - + auto rectangle_intersection(Boxes &rectangles) { tree_node* R = kdtree_from_boxes(rectangles); parlay::sequence results(R->num_leaves); diff --git a/examples/star_connectivity.h b/examples/star_connectivity.h index e1a2014..f6e8b6b 100644 --- a/examples/star_connectivity.h +++ b/examples/star_connectivity.h @@ -26,9 +26,9 @@ std::uniform_real_distribution rdis(0.0,1.0); template parlay::sequence star_contract(const parlay::sequence>& E, - parlay::sequence V, - parlay::sequence& parents, - rg generator) { + parlay::sequence V, + parlay::sequence& parents, + rg generator) { using edge = std::pair; // std::cout << "size : " << E.size() << ", " << V.size() << std::endl; @@ -67,7 +67,7 @@ star_contract(const parlay::sequence>& E, template std::pair, parlay::sequence> star_connectivity_simple(const parlay::sequence>& E, - long n) { + long n) { using edge = std::pair; auto parents = parlay::tabulate(n, [] (vertex i) {return i;}); auto roots = star_contract(E, parents, parents, rg(0)); @@ -82,9 +82,9 @@ star_connectivity_simple(const parlay::sequence>& E, template parlay::sequence star_contract_sample(const parlay::sequence>& E, - parlay::sequence V, - parlay::sequence& parents, - rg generator) { + parlay::sequence V, + parlay::sequence& parents, + rg generator) { using edge = std::pair; // std::cout << "size : " << E.size() << ", " << V.size() << std::endl; @@ -97,8 +97,8 @@ star_contract_sample(const parlay::sequence>& E, float frac = V.size() * 3.0 / E.size(); rg r = generator[0]; auto Ee = parlay::map_maybe(parlay::iota(E.size()), [&] (long i) { - rg r = generator[i]; - return rdis(r) < frac ? std::optional(E[i]) : std::optional();}); + rg r = generator[i]; + return rdis(r) < frac ? std::optional(E[i]) : std::optional();}); return star_contract_sample(Ee, V, parents, generator[E.size()]); } @@ -131,7 +131,7 @@ star_contract_sample(const parlay::sequence>& E, template std::pair, parlay::sequence> star_connectivity(const parlay::sequence>& E, - long n) { + long n) { using edge = std::pair; auto parents = parlay::tabulate(n, [] (vertex i) {return i;}); diff --git a/examples/suffix_tree.h b/examples/suffix_tree.h index ed51bb7..2c83da5 100644 --- a/examples/suffix_tree.h +++ b/examples/suffix_tree.h @@ -24,8 +24,8 @@ radix_tree suffix_tree(const Str& S) { parlay::for_each(result.tree, [&] (auto& node) { node.string_idx = SA[node.string_idx]; node.children = parlay::map(node.children, [&] (index i) { - // leaf if (i%2 == 0), otherwise internal - return (i % 2 == 0) ? 2 * SA[i/2] : i;}, 1000);}); + // leaf if (i%2 == 0), otherwise internal + return (i % 2 == 0) ? 2 * SA[i/2] : i;}, 1000);}); return result; } @@ -39,8 +39,8 @@ long find(const radix_tree& T, const Str& str, const sStr& search_str) { // check if any children match on first character for (auto child : T.get_children(current)) if (str[T.get_string(child) + depth] == search_str[depth]) { - new_node = child; - break; + new_node = child; + break; } if (new_node == current) return -1l; // none found index new_depth = T.is_leaf(new_node) ? str.size() - T.get_string(new_node) : T.get_depth(new_node);