Skip to content

Commit

Permalink
fixing errors for ms compiler
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Feb 27, 2024
1 parent ed00a4e commit 1bb6759
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 22 deletions.
2 changes: 1 addition & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ set(EXAMPLES "primes" "BFS" "word_counts" "tokens" "filter" "linefit"
"knuth_morris_pratt" "huffman_tree" "decision_tree_c45" "karatsuba" "suffix_tree"
"2d_linear_program" "box_kdtree" "radix_tree" "ray_trace" "hash_map" "oct_tree"
"3d_range" "rectangle_intersection" "star_connectivity" "ldd_connectivity" "boruvka"
"counting_sort")
"counting_sort" "integer_sort")

function(add_example NAME)
add_executable(${NAME} ${NAME}.cpp)
Expand Down
74 changes: 55 additions & 19 deletions examples/counting_sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,53 @@
#include <parlay/sequence.h>
#include <parlay/primitives.h>

template <typename InIt, typename OutIt, typename KeyIt>
parlay::sequence<int>
counting_sort(const InIt& begin, const InIt& end,
OutIt out, const KeyIt& keys,
// **************************************************************
// Counting sort
// A parallel version of counting sort. It breaks the input into
// partitions and for each partition, in parallel, it counts how many
// of each key there are. It then using scan to calculate the offsets
// for each bucket in each partition, and does a final pass placing
// all keys in their correct position.
// **************************************************************

using counter_type = unsigned long;

// **************************************************************
// Input:
// begin and end iterators for the values to be rearranged
// begin iterator for the output (value_type must be the same)
// begin iterator for the keys (range must be same length as values)
// num_buckets : number of buckets (should be smallish, e.g. 256)
// Output:
// Offsets within output of each key. Will be of length
// num_buckets+1 since last entry will contain total size
// (i.e. end-begin).
// **************************************************************
template <typename InIt, typename OutIt, typename KeyIt>
parlay::sequence<counter_type>
counting_sort(const InIt& begin, const InIt& end,
OutIt out, const KeyIt& keys,
long num_buckets) {
long n = end - begin;
long num_parts = n / (num_buckets * 64) + 1;
if (n == 0) return parlay::sequence<counter_type>(1, 0);
long num_parts = std::min(1000l, n / (num_buckets * 64) + 1);
long part_size = (n - 1)/num_parts + 1;

// first count buckets within each partition
auto counts = parlay::sequence<int>::uninitialized(num_buckets * num_parts);
auto counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
parlay::parallel_for(0, num_parts, [&] (long i) {
long start = i * part_size;
long end = std::min<long>(start + part_size, n);
for (int j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
for (size_t j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
for (long j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
for (long j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
}, 1);

// transpose the counts if more than one part
parlay::sequence<int> trans_counts;
if (num_parts > 1) {
trans_counts = parlay::sequence<int>::uninitialized(num_buckets * num_parts);
parlay::parallel_for(0, num_buckets, [&] (long i) {
for (size_t j = 0; j < num_parts; j++)
parlay::sequence<counter_type> trans_counts;
if (num_parts > 1) {
trans_counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
parlay::parallel_for(0, num_buckets, [&] (long i) {
for (size_t j = 0; j < num_parts; j++)
trans_counts[i* num_parts + j] = counts[j * num_buckets + i];}, 1);
} else trans_counts = std::move(counts);

Expand All @@ -39,19 +62,32 @@ counting_sort(const InIt& begin, const InIt& end,
parlay::parallel_for(0, num_parts, [&] (long i) {
long start = i * part_size;
long end = std::min<long>(start + part_size, n);
int local_offsets[num_buckets];
parlay::sequence<counter_type> local_offsets(num_buckets);

// transpose back
for (int j = 0; j < num_buckets; j++)
for (long j = 0; j < num_buckets; j++)
local_offsets[j] = trans_counts[num_parts * j + i];

// copy to output
for (size_t j = start; j < end; j++) {
int k = local_offsets[keys[j]]++;
for (long j = start; j < end; j++) {
counter_type k = local_offsets[keys[j]]++;
// prefetching speeds up the code
#if defined(__GNUC__) || defined(__clang__)
__builtin_prefetch (((char*) &out[k]) + 64);
#endif
out[k] = begin[j];
}}, 1);

return parlay::tabulate(num_buckets, [&] (long i) {
return trans_counts[i * num_parts];});
return parlay::tabulate(num_buckets+1, [&] (long i) {
return (i == num_buckets) ? (counter_type) n : trans_counts[i * num_parts];});
}

// A version that uses ranges as inputs and generates its own output sequence
template <typename InRange, typename KeysRange>
auto counting_sort(const InRange& in, const KeysRange& keys,
long num_buckets) {
auto out = parlay::sequence<typename InRange::value_type>::uninitialized(in.size());
auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
num_buckets);
return std::pair(std::move(out), std::move(offsets));
}
5 changes: 3 additions & 2 deletions examples/samplesort.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <parlay/utilities.h>

#include "helper/heap_tree.h"
#include "counting_sort.h"

// **************************************************************
// Sample sort
Expand All @@ -29,7 +30,7 @@ void sample_sort_(Range in, Range out, Less less, int level=1) {
long cutoff = 256;
if (n <= cutoff || level > 2) {
parlay::copy(in, out);
std::stable_sort(out.begin(), out.end());
std::sort(out.begin(), out.end());
return;
}

Expand Down Expand Up @@ -58,7 +59,7 @@ void sample_sort_(Range in, Range out, Less less, int level=1) {
return ss.find(in[i], less);});

// sort into the buckets
auto [keys,offsets] = parlay::internal::count_sort(in, bucket_ids, num_buckets);
auto [keys,offsets] = counting_sort(in, bucket_ids, num_buckets);

// now recursively sort each bucket
parlay::parallel_for(0, num_buckets, [&, &keys = keys, &offsets = offsets] (long i) {
Expand Down

0 comments on commit 1bb6759

Please sign in to comment.