diff --git a/examples/2d_linear_program.h b/examples/2d_linear_program.h
index a43ee3a..d6054b5 100644
--- a/examples/2d_linear_program.h
+++ b/examples/2d_linear_program.h
@@ -77,7 +77,7 @@ auto linear_program_2d(const constraints& H_in, constraint c) {
     long top = std::min(2*i, n);
     // check for a violating constraint from i to top
     long loc = parlay::reduce(parlay::delayed_tabulate(top-i, [&] (long j) {
-	  return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum<long>());
+          return violate(p, H[i+j]) ? i+j : n;}), parlay::minimum<long>());
 
     if (loc == n) i = top; // no violing constraint found, repeat and double again
     else { // found a violating constraint at location loc
@@ -85,11 +85,11 @@ auto linear_program_2d(const constraints& H_in, constraint c) {
       // i.e. H[loc] x c and h x c have opposite signs
       coord cr = cross(H[loc],c);
       auto Hf = parlay::filter(H.cut(0,loc), [&] (constraint h) {
-	  return cr * cross(h,c) < 0;});
+          return cr * cross(h,c) < 0;});
 
       // find the tightest such constraint
       auto min = [&] (constraint a, constraint b) {
-	return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;};
+        return cr * (project(H[loc], a) - project(H[loc], b)) > 0 ? a : b;};
       constraint cx = parlay::reduce(Hf, parlay::binary_op(min,constraint{0.,0.,0.}));
 
       // update the optimal point and the index i
diff --git a/examples/3d_range.h b/examples/3d_range.h
index c8e42e2..f8dcc3b 100644
--- a/examples/3d_range.h
+++ b/examples/3d_range.h
@@ -75,8 +75,8 @@ struct search {
     leaf* TL = static_cast<leaf*>(T);
     for (int i = 0; i < TL->size; i++)
       if (TL->pts[i].id != p.id &&
-	  distance_squared(TL->pts[i].pnt) < r * r)
-	in_range.push_back(TL->pts[i].id); }
+          distance_squared(TL->pts[i].pnt) < r * r)
+        in_range.push_back(TL->pts[i].id); }
   
   // looks for points within range for p in subtree rooted at T.
   // Can return immediately if radius does not intersect the box.
@@ -85,8 +85,8 @@ struct search {
       if (T->is_leaf) add_leaf(T);
       else {
         interior* TI = static_cast<interior*>(T);
-	range_search_down(TI->left);
-	range_search_down(TI->right);
+        range_search_down(TI->left);
+        range_search_down(TI->right);
       }
     }
   }
diff --git a/examples/BFS.h b/examples/BFS.h
index 4cf3a62..675c58c 100644
--- a/examples/BFS.h
+++ b/examples/BFS.h
@@ -57,9 +57,9 @@ auto BFS(vertex start, const graph& G) {
 
     // keep the v that succeed in setting the visited array
     frontier = delayed::to_sequence(delayed::map_maybe(out, [&] (vertex v) {
-      bool expected = false;									    
+      bool expected = false;                                                                            
       if ((!visited[v]) && visited[v].compare_exchange_strong(expected, true))
-	return std::optional<vertex>(v);
+        return std::optional<vertex>(v);
       return std::optional<vertex>();}));
   }
 
diff --git a/examples/bigint_add.h b/examples/bigint_add.h
index ae1ffb4..e4b3262 100644
--- a/examples/bigint_add.h
+++ b/examples/bigint_add.h
@@ -59,20 +59,20 @@ bigint add(const Bigint1& a, const Bigint2& b, bool extra_one=false) {
   } else { // do in parallel
     // check which digits will carry or propagate
     auto c = delayed::tabulate(na, [&] (long i) {
-	double_digit s = a[i] + static_cast<double_digit>(B(i));
-	s += (i == 0 && extra_one);
-	return static_cast<carry>(2 * (s == mask) + (s >> digit_len));}); 
+        double_digit s = a[i] + static_cast<double_digit>(B(i));
+        s += (i == 0 && extra_one);
+        return static_cast<carry>(2 * (s == mask) + (s >> digit_len));}); 
 
     // use scan to do the propagation
     auto f = [] (carry a, carry b) {return (b == propagate) ? a : b;};
     auto cc = delayed::scan(c, parlay::binary_op(f, propagate)).first;
     auto z = delayed::zip(cc, parlay::iota(na));
     result = delayed::to_sequence(delayed::map(z, [&] (auto p) {
-    	auto [ci, i] = p;
-    	return a[i] + B(i) + ci;}));
+            auto [ci, i] = p;
+            return a[i] + B(i) + ci;}));
     //auto cc = parlay::scan(c, parlay::binary_op(f, propagate)).first;
     //result = parlay::tabulate(na, [&] (long i) {
-    //	return a[i] + B(i) + cc[i];});
+    //        return a[i] + B(i) + cc[i];});
   }
   if ((a_sign == b_sign) && ((result[na-1] >> (digit_len - 1)) != a_sign))
     result.push_back(a_sign ? mask : 1u);
@@ -83,5 +83,5 @@ template <typename Bigint1, typename Bigint2>
 bigint subtract(const Bigint1& a, const Bigint2& b) {
   // effectively negate b and add
   return add(a, delayed::map(b, [] (auto bv) {return ~bv;}),
-	     true);
+             true);
 }
diff --git a/examples/boruvka.h b/examples/boruvka.h
index b4b7dd1..a3b207b 100644
--- a/examples/boruvka.h
+++ b/examples/boruvka.h
@@ -16,9 +16,9 @@ using edge = std::pair<vertex,vertex>;
 using w_edge = std::pair<edge,w_type>;
 
 parlay::sequence<w_type> boruvka(const parlay::sequence<w_edge>& E,
-				 const parlay::sequence<vertex> V,
-				 parlay::sequence<std::atomic<w_type>>& W,
-				 parlay::sequence<vertex>& P) {
+                                 const parlay::sequence<vertex> V,
+                                 parlay::sequence<std::atomic<w_type>>& W,
+                                 parlay::sequence<vertex>& P) {
   //std::cout << E.size() << ", " << V.size() << std::endl;
 
   if (E.size() == 0) return parlay::sequence<w_type>();
@@ -36,7 +36,7 @@ parlay::sequence<w_type> boruvka(const parlay::sequence<w_edge>& E,
       return (W[u] == e.second || W[v] == e.second);});
   
   auto V_new = star_contract(map(Es, [] (w_edge e) {return e.first;}),
-			     V, P, parlay::random_generator(0));
+                             V, P, parlay::random_generator(0));
   
   // update edges to new endpoints and filter out self edges
   auto ES = parlay::delayed::map(E, [&] (w_edge e) {
diff --git a/examples/box_kdtree.h b/examples/box_kdtree.h
index b92478a..7f837b2 100644
--- a/examples/box_kdtree.h
+++ b/examples/box_kdtree.h
@@ -74,7 +74,7 @@ struct tree_node {
   bool is_leaf() {return left == nullptr;}
 
   tree_node(tree_node* L, tree_node* R, 
-	   int cut_dim, float cut_off, Bounding_Box B) 
+           int cut_dim, float cut_off, Bounding_Box B) 
     : left(L), right(R), cut_dim(cut_dim), 
       cut_off(cut_off), box(B) {
     n = L->n + R->n;
@@ -99,9 +99,9 @@ struct tree_node {
     if (!is_leaf()) {
       if (left == nullptr || right == nullptr) abort();
       parlay::par_do_if(n > 1000,
-			[&] () { node_allocator.retire(left);},
-			[&] () { node_allocator.retire(right);}
-			);};
+                        [&] () { node_allocator.retire(left);},
+                        [&] () { node_allocator.retire(right);}
+                        );};
   }
 };
 
@@ -158,8 +158,8 @@ cut_info best_cut(parlay::sequence<event> const &E, range r, range r1, range r2)
 using events_pair = std::pair<parlay::sequence<event>, parlay::sequence<event>>;
 
 events_pair split_events(const parlay::sequence<range> &box_ranges,
-			 const parlay::sequence<event> &events,
-			 float cut_off) {
+                         const parlay::sequence<event> &events,
+                         float cut_off) {
   index_t n = events.size();
   auto lower = parlay::sequence<bool>::uninitialized(n);
   auto upper = parlay::sequence<bool>::uninitialized(n);
@@ -173,9 +173,9 @@ events_pair split_events(const parlay::sequence<range> &box_ranges,
 
 // n is the number of events (i.e. twice the number of triangles)
 tree_node* generate_node(Ranges &boxes,
-			 Events events,
-			 Bounding_Box B, 
-			 size_t maxDepth) {
+                         Events events,
+                         Bounding_Box B, 
+                         size_t maxDepth) {
   index_t n = events[0].size();
   if (n <= 2 || maxDepth == 0) 
     return tree_node::node_allocator.allocate(std::move(events), n, B);
@@ -218,9 +218,9 @@ tree_node* generate_node(Ranges &boxes,
   for (int i=0; i < 3; i++) events[i] = parlay::sequence<event>();
   tree_node *L, *R;
   parlay::par_do([&] () {L = generate_node(boxes, std::move(left_events),
-					   BBL, maxDepth-1);},
-		 [&] () {R = generate_node(boxes, std::move(right_events),
-					   BBR, maxDepth-1);});
+                                           BBL, maxDepth-1);},
+                 [&] () {R = generate_node(boxes, std::move(right_events),
+                                           BBR, maxDepth-1);});
   return tree_node::node_allocator.allocate(L, R, cut_dim, cut_off, B);
 }
 
@@ -236,9 +236,9 @@ auto kdtree_from_boxes(Boxes& boxes) {
     events[d] = parlay::sequence<event>(2*n);
     ranges[d] = parlay::sequence<range>(n);
     parlay::parallel_for(0, n, [&] (size_t i) {
-	events[d][2*i] = event(boxes[i][d][0], i, false);
-	events[d][2*i+1] = event(boxes[i][d][1], i, true);
-	ranges[d][i] = boxes[i][d];});
+        events[d][2*i] = event(boxes[i][d][0], i, false);
+        events[d][2*i+1] = event(boxes[i][d][1], i, true);
+        ranges[d][i] = boxes[i][d];});
     parlay::sort_inplace(events[d], [] (event a, event b) {return a.v < b.v;});
     boundingBox[d] = range{events[d][0].v, events[d][2*n-1].v};
   }
diff --git a/examples/counting_sort.cpp b/examples/counting_sort.cpp
index c96bacb..9172c1f 100644
--- a/examples/counting_sort.cpp
+++ b/examples/counting_sort.cpp
@@ -30,21 +30,23 @@ int main(int argc, char* argv[]) {
     auto data = parlay::tabulate(n, [&] (long i) {
       auto r = gen[i];
       return dis(r);});
-    
+
     parlay::internal::timer t("Time");
     parlay::sequence<long> result(n);
     for (int i=0; i < 5; i++) {
       t.start();
       counting_sort(data.begin(), data.end(),
-		    result.begin(),
-		    data.begin(),
-		    num_buckets);
+                    result.begin(),
+                    data.begin(),
+                    num_buckets);
       t.next("counting_sort");
     }
 
     auto first_ten = result.head(10);
     auto last_ten = result.tail(10);
-    std::cout << "first 10 elements: " << parlay::to_chars(first_ten) << std::endl;
-    std::cout << "last 10 elements: " << parlay::to_chars(last_ten) << std::endl;
+    std::cout << "first 10 elements: " << parlay::to_chars(first_ten)
+              << std::endl;
+    std::cout << "last 10 elements: " << parlay::to_chars(last_ten)
+              << std::endl;
   }
 }
diff --git a/examples/counting_sort.h b/examples/counting_sort.h
index b8c63f6..fd880b9 100644
--- a/examples/counting_sort.h
+++ b/examples/counting_sort.h
@@ -1,5 +1,6 @@
 #include <algorithm>
 #include <functional>
+#include <vector>
 
 #include <parlay/parallel.h>
 #include <parlay/sequence.h>
@@ -13,10 +14,20 @@
 // of each key there are.  It then using scan to calculate the offsets
 // for each bucket in each partition, and does a final pass placing
 // all keys in their correct position.
+//
+// For input of size n, and for m buckets
+//   Work = O(n)
+//   Span = O(m + n / m)
 // **************************************************************
 
 using counter_type = unsigned long;
 
+void prefetch(void* l) {
+#if defined(__GNUC__) || defined(__clang__)
+  __builtin_prefetch (l);
+#endif
+}
+
 // **************************************************************
 // Input:
 //   begin and end iterators for the values to be rearranged
@@ -30,65 +41,59 @@ using counter_type = unsigned long;
 // **************************************************************
 template <typename InIt, typename OutIt, typename KeyIt>
 parlay::sequence<counter_type>
-counting_sort(const InIt& begin, const InIt& end,                                                    
+counting_sort(const InIt& begin, const InIt& end,
               OutIt out, const KeyIt& keys,
               long num_buckets) {
-  long n = end - begin;
+  counter_type n = end - begin;
   if (n == 0) return parlay::sequence<counter_type>(1, 0);
-  long num_parts = std::min(1000l, n / (num_buckets * 64) + 1);
-  long part_size = (n - 1)/num_parts + 1;
+  long num_parts = std::min<long>(1000l, n / (num_buckets * 64) + 1);
+  counter_type part_n = (n - 1)/num_parts + 1;
+  long m = num_buckets * num_parts;
 
   // first count buckets within each partition
-  auto counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
+  auto counts = parlay::sequence<counter_type>::uninitialized(m);
   parlay::parallel_for(0, num_parts, [&] (long i) {
-    long start = i * part_size;
-    long end = std::min<long>(start + part_size, n);
-    for (long j = 0; j < num_buckets; j++) counts[i*num_buckets + j] = 0;
-    for (long j = start; j < end; j++) counts[i*num_buckets + keys[j]]++;
+    std::vector<counter_type> local_counts(num_buckets);
+    for (long j = 0; j < num_buckets; j++)
+      local_counts[j] = 0;
+    for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++)
+      local_counts[keys[j]]++;
+    // transpose so equal buckets are adjacent
+    for (long j = 0; j < num_buckets; j++) 
+      counts[num_parts * j + i] = local_counts[j];
   }, 1);
 
-   // transpose the counts if more than one part                                                             
-  parlay::sequence<counter_type> trans_counts;                                                                       
-  if (num_parts > 1) {
-    trans_counts = parlay::sequence<counter_type>::uninitialized(num_buckets * num_parts);
-    parlay::parallel_for(0, num_buckets, [&] (long i) {
-      for (size_t j = 0; j < num_parts; j++)
-	trans_counts[i* num_parts + j] = counts[j * num_buckets + i];}, 1);
-  } else trans_counts = std::move(counts);
-
   // scan for offsets for all buckets
-  parlay::scan_inplace(trans_counts);
+  parlay::scan_inplace(counts);
 
   // go back over partitions to place in final location
   parlay::parallel_for(0, num_parts, [&] (long i) {
-    long start = i * part_size;
-    long end = std::min<long>(start + part_size, n);
-    parlay::sequence<counter_type> local_offsets(num_buckets);
-
-    // transpose back
+    std::vector<counter_type> local_offsets(num_buckets);
+    // transpose back into local offsets
     for (long j = 0; j < num_buckets; j++)
-       local_offsets[j] = trans_counts[num_parts * j + i];
-
+       local_offsets[j] = counts[num_parts * j + i];
     // copy to output
-    for (long j = start; j < end; j++) {
+    for (long j = i * part_n; j < std::min((i+1) * part_n, n); j++) {
       counter_type k = local_offsets[keys[j]]++;
-      // prefetching speeds up the code
-      #if defined(__GNUC__) || defined(__clang__)
-      __builtin_prefetch (((char*) &out[k]) + 64);
-      #endif
+      prefetch(((char*) &out[k]) + 64);
       out[k] = begin[j];
     }}, 1);
 
-  return parlay::tabulate(num_buckets+1, [&] (long i) {
-    return (i == num_buckets) ? (counter_type) n : trans_counts[i * num_parts];});
+  // return offsets for each bucket.
+  // includes extra element at end containing n
+  return parlay::tabulate(num_buckets + 1, [&] (long i) {
+           return (i == num_buckets) ? n : counts[i * num_parts];});
 }
 
-// A version that uses ranges as inputs and generates its own output sequence
+// A wrapper that uses ranges as inputs and generates its own output
+// sequence
 template <typename InRange, typename KeysRange>
 auto counting_sort(const InRange& in, const KeysRange& keys,
-		   long num_buckets) {
-  auto out = parlay::sequence<typename InRange::value_type>::uninitialized(in.size());
-  auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
-			       num_buckets);
+                   long num_buckets) {
+  using V = typename InRange::value_type;
+  auto out = parlay::sequence<V>::uninitialized(in.size());
+  auto offsets = counting_sort(in.begin(), in.end(),
+                               out.begin(), keys.begin(),
+                               num_buckets);
   return std::pair(std::move(out), std::move(offsets));
 }
diff --git a/examples/fast_fourier_transform.h b/examples/fast_fourier_transform.h
index f071cce..bbba27f 100644
--- a/examples/fast_fourier_transform.h
+++ b/examples/fast_fourier_transform.h
@@ -24,13 +24,13 @@ void fft_recursive(long n, long s, T const* in, T* out, T const* w) {
      out[1] = in[0] - in[s];
   } else {
     parlay::par_do_if(n >= 256,
-		      [&] {fft_recursive(n/2, 2*s, in, out, w);},
-		      [&] {fft_recursive(n/2, 2*s, in+s, out+n/2, w);});
+                      [&] {fft_recursive(n/2, 2*s, in, out, w);},
+                      [&] {fft_recursive(n/2, 2*s, in+s, out+n/2, w);});
     parlay::parallel_for(0, n/2, [&] (long i) {
-	T p = out[i];
-	T q = out[i+n/2] * w[i * s]; 
-	out[i] = p + q;
-	out[i+n/2] = p - q;}, 1000);
+        T p = out[i];
+        T q = out[i+n/2] * w[i * s]; 
+        out[i] = p + q;
+        out[i+n/2] = p - q;}, 1000);
   }
 }
 
@@ -72,16 +72,16 @@ auto complex_fft(const parlay::sequence<complex>& a) {
 // loop-based fft (assumes A is in bit-reveresed order)
 template <typename T>
 auto fft_base(parlay::sequence<T>& A,
-	      const parlay::sequence<T>& w) {
+              const parlay::sequence<T>& w) {
   long n = A.size();
   int is = n/2;
   for (int s = 1; s < n; s = s*2, is = is/2) 
     for (int i = 0; i < n; i += 2*s)
       for (int j = 0; j < s; j++) {
-	T p = A[i + j];
-	T q = A[i + j + s] * w[j*is]; 
-	A[i + j] = p + q;
-	A[i + j + s] = p - q;
+        T p = A[i + j];
+        T q = A[i + j + s] * w[j*is]; 
+        A[i + j] = p + q;
+        A[i + j + s] = p - q;
       }
 }
 
@@ -103,22 +103,22 @@ auto fft_transpose(const parlay::sequence<parlay::sequence<T>>& cols, T nth_root
   parlay::sequence<T> wc = powers(std::pow(nth_root, num_cols), num_rows);
   auto columns = parlay::tabulate(num_cols, [&] (long i) {
       auto c = parlay::tabulate(num_rows, [&] (long j) {
-	  return cols[i][br[j]];});
+          return cols[i][br[j]];});
       fft_base(c, wc);
       auto w = powers(std::pow(nth_root, i), num_rows);
       // rotate to avoid stride problems with set-associative cache 
       return parlay::tabulate(num_rows, [&] (long j) {
-	  int k = (num_rows + j - i) & (num_rows - 1);
-	  return c[k]*w[k];});});
+          int k = (num_rows + j - i) & (num_rows - 1);
+          return c[k]*w[k];});});
 
   // transpose and process rows
   br = bit_reverse(num_cols);
   auto wr = powers(std::pow(nth_root, num_rows), num_cols);
   return parlay::tabulate(num_rows, [&] (long j) {
       auto row = parlay::tabulate(num_cols, [&] (long i) {
-	  return columns[i][i+j & (num_rows - 1)];}); // transpose
+          return columns[i][i+j & (num_rows - 1)];}); // transpose
       auto r = parlay::tabulate(num_cols, [&] (long i) {
-	  return row[br[i]];});
+          return row[br[i]];});
       fft_base(r, wr);
       return r;});
 }
diff --git a/examples/hash_map.h b/examples/hash_map.h
index 7ffc170..f63556a 100644
--- a/examples/hash_map.h
+++ b/examples/hash_map.h
@@ -23,9 +23,9 @@
 // **************************************************************
 
 template <typename K,
-	  typename V,
-	  typename Hash = parlay::hash<K>,
-	  typename Equal = std::equal_to<>>
+          typename V,
+          typename Hash = parlay::hash<K>,
+          typename Equal = std::equal_to<>>
 struct hash_map {
  private:
   using KV = std::pair<K,V>;
@@ -57,27 +57,27 @@ struct hash_map {
     while (true) {
       state status = H[i].status;
       if (status == full || status == tomb) {
-	if (H[i].key == k) {
-	  if (status == full) return false;
-	  else if (H[i].status.compare_exchange_strong(status, locked)) {
-	    H[i].value = v;
-	    H[i].status = full;
-	    return true;	    
-	  }
-	} else {
-	  if (count++ == std::min(1000l,m)) {
-	    std::cout << "Hash table overfull" << std::endl;
-	    return false;
-	  }
-	  i = next_index(i);
-	}
+        if (H[i].key == k) {
+          if (status == full) return false;
+          else if (H[i].status.compare_exchange_strong(status, locked)) {
+            H[i].value = v;
+            H[i].status = full;
+            return true;            
+          }
+        } else {
+          if (count++ == std::min(1000l,m)) {
+            std::cout << "Hash table overfull" << std::endl;
+            return false;
+          }
+          i = next_index(i);
+        }
       } else if (status == empty) {
-	if (H[i].status.compare_exchange_strong(status, locked)) {
-	  H[i].key = k;
-	  H[i].value = v;
-	  H[i].status = full;
-	  return true;
-	}
+        if (H[i].status.compare_exchange_strong(status, locked)) {
+          H[i].key = k;
+          H[i].value = v;
+          H[i].status = full;
+          return true;
+        }
       }
     }
   }
@@ -96,11 +96,11 @@ struct hash_map {
     while (true) {
       if (H[i].status == empty || H[i].status == locked) return {};
       if (H[i].key == k) {
-	state old = full;
-	if (H[i].status == full &&
-	    H[i].status.compare_exchange_strong(old, tomb)) 
-	  return std::move(H[i].value);
-	else return {};
+        state old = full;
+        if (H[i].status == full &&
+            H[i].status.compare_exchange_strong(old, tomb)) 
+          return std::move(H[i].value);
+        else return {};
       }
       i = next_index(i);
     }
@@ -108,12 +108,12 @@ struct hash_map {
 
   parlay::sequence<K> keys() {
     return parlay::map_maybe(H, [] (const entry &x) {
-	return (x.status == full) ? std::optional{x.key} : std::optional<K>{};});
+        return (x.status == full) ? std::optional{x.key} : std::optional<K>{};});
   }
 
   size_t size() {
     return parlay::reduce(parlay::delayed_map(H, [&] (auto const &x) -> long {
-	  return x.status == full;}));
+          return x.status == full;}));
   }
 };
 
diff --git a/examples/integer_sort.cpp b/examples/integer_sort.cpp
index 680dea3..947c884 100644
--- a/examples/integer_sort.cpp
+++ b/examples/integer_sort.cpp
@@ -1,5 +1,4 @@
 #include <iostream>
-#include <string>
 #include <random>
 #include <cmath>
 
@@ -23,10 +22,10 @@ int main(int argc, char* argv[]) {
     catch (...) { std::cout << usage << std::endl; return 1; }
 
     using int_type = unsigned int;
-	
+        
     parlay::random_generator gen;
     std::uniform_int_distribution<int_type> dis(0, n-1);
-    int bits = std::ceil(std::log2(n));
+    int num_bits = std::ceil(std::log2(n));
     
     // generate random int_type values between 0 and n
     parlay::sequence<int_type> data = parlay::tabulate(n, [&] (long i) {
@@ -38,11 +37,12 @@ int main(int argc, char* argv[]) {
     for (int i = 0; i < 5; i++) {
       result = data;
       t.start();
-      ::integer_sort(result, bits);
+      ::integer_sort(result, num_bits);
       t.next("integer_sort");
     }
 
     auto first_ten = result.head(10);
-    std::cout << "first 10 elements: " << parlay::to_chars(first_ten) << std::endl;
+    std::cout << "first 10 elements: " << parlay::to_chars(first_ten)
+              << std::endl;
   }
 }
diff --git a/examples/integer_sort.h b/examples/integer_sort.h
index bba0a74..2d24005 100644
--- a/examples/integer_sort.h
+++ b/examples/integer_sort.h
@@ -25,18 +25,21 @@
 
 // A bottom up radix sort with 8-bits per round
 template <typename Range>
-void bottom_up_radix_sort(Range in, Range out, long bits, long bot_bit, bool inplace) {
+void bottom_up_radix_sort(Range in, Range out,
+                          long num_bits, long bot_bit, bool inplace) {
   const long radix_bits = 8;
-  if (bot_bit >= bits) {
+  if (bot_bit >= num_bits) {
     if (!inplace) parlay::copy(in, out);
   } else {
-    long num_buckets = std::min(1l << radix_bits, 1l << (bits - bot_bit));
+    long num_buckets = std::min(1l << radix_bits,
+                                1l << (num_bits - bot_bit));
     // the keys are bits from bot_bit to bits
     auto keys = parlay::delayed::tabulate(in.size(), [&] (long i) {
       return (in[i] >> bot_bit) & (num_buckets-1);});
     counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
-		  num_buckets);
-    bottom_up_radix_sort(out, in, bits, bot_bit + radix_bits, !inplace);
+                  num_buckets);
+    bottom_up_radix_sort(out, in,
+                         num_bits, bot_bit + radix_bits, !inplace);
   }
 }
 
@@ -51,16 +54,19 @@ void radix_sort(Range in, Range out, long bits, bool inplace) {
     bottom_up_radix_sort(in, out, bits, 0, inplace);
   else {
     // number of bits in radix 
-    long radix_bits = std::min<long>(bits, std::min<long>(10l, std::ceil(std::log2(2*n/cutoff))));
+    long radix_bits = std::min({bits,
+                                10l,
+                                (long) std::ceil(std::log2(2*n/cutoff))});
     long num_buckets = 1l << radix_bits;
 
     // extract the needed bits for the keys
     auto keys = parlay::delayed::tabulate(n, [&] (long i) {
        return (in[i] >> (bits - radix_bits)) & (num_buckets-1);});
-  
-    // sort in into the out based on keys
-    auto offsets = counting_sort(in.begin(), in.end(), out.begin(), keys.begin(),
-				 num_buckets);
+
+    // sort in into out based on keys
+    auto offsets = counting_sort(in.begin(), in.end(),
+                                 out.begin(), keys.begin(),
+                                 num_buckets);
 
     // now recursively sort each bucket
     parlay::parallel_for(0, num_buckets, [&] (long i) {
@@ -68,16 +74,17 @@ void radix_sort(Range in, Range out, long bits, bool inplace) {
       long last = offsets[i+1];  // end of bucket
       // note that order of in and out is flipped (as is inplace)
       radix_sort(out.cut(first,last), in.cut(first,last),
-		 bits - radix_bits, !inplace);
+                 bits - radix_bits, !inplace);
     }, 1);
   }
 }
 
-// An inplace (i.e., result is in the same place as the input) integer_sort
-// Requires O(n) temp space
+// An inplace (i.e., result is in the same place as the input)
+// integer_sort.  Requires O(n) temp space.
 template <typename Range>
 void integer_sort(Range& in, long bits) {
-  auto tmp = parlay::sequence<typename Range::value_type>::uninitialized(in.size());
+  using V = typename Range::value_type;
+  auto tmp = parlay::sequence<V>::uninitialized(in.size());
   auto in_slice = parlay::make_slice(in);
   auto tmp_slice = parlay::make_slice(tmp);
   radix_sort(in_slice, tmp_slice, bits, true);
diff --git a/examples/lasso_regression.h b/examples/lasso_regression.h
index eb3a709..7b1aec7 100644
--- a/examples/lasso_regression.h
+++ b/examples/lasso_regression.h
@@ -131,7 +131,7 @@ void solve_lasso(const sparse_matrix& AT, const vector& y,
     // Convergence check
     if (step > 0) {
       if (max_change <= (step + 1) * delta_threshold || counter > 100) {
-        step--;	counter = 0;
+        step--;        counter = 0;
       }
     } else { // Only use actual objective on last step.
       real obj = objective(Ax, x, y, lambda);
diff --git a/examples/ldd_connectivity.h b/examples/ldd_connectivity.h
index 4b6c8f5..fddf637 100644
--- a/examples/ldd_connectivity.h
+++ b/examples/ldd_connectivity.h
@@ -30,12 +30,12 @@ ldd_connectivity(const graph<vertex>& G, float beta = .5) {
 
   // Extract the remaining edges
   auto E_r = parlay::flatten(parlay::tabulate(n, [&] (vertex u) {
-	      auto remain = filter(G[u], [&] (vertex v) {return P[u] != P[v];});
-	      return map(remain, [&] (vertex v) {return edge(P[u],P[v]);}, 1000);}));
+              auto remain = filter(G[u], [&] (vertex v) {return P[u] != P[v];});
+              return map(remain, [&] (vertex v) {return edge(P[u],P[v]);}, 1000);}));
 
   // Extract the remaining vertices
   parlay::sequence<vertex> V_r = parlay::pack_index<vertex>(parlay::tabulate(n, [&] (vertex v) {
-							       return P[v] == v;}));
+                                                               return P[v] == v;}));
 
   // Finish off with star contraction on remaining graph
   auto roots = star_contract(E_r, V_r, P, parlay::random_generator(0));
diff --git a/examples/le_list.h b/examples/le_list.h
index 09afd62..bd4bf26 100644
--- a/examples/le_list.h
+++ b/examples/le_list.h
@@ -51,7 +51,7 @@ class le_list{
     // create n empty le-lists, each of size capacity
     A = tabulate(n, [=] (vertex i) {
           return tabulate(capacity, [] (int j){
-	    return std::make_pair((vertex) 0, max_distance);});});
+            return std::make_pair((vertex) 0, max_distance);});});
     // initial sizes are 0
     sizes = tabulate<std::atomic<int>>(n, [] (vertex i) {return 0;});
   }
@@ -68,7 +68,7 @@ class le_list{
   // lists is unspecified.
   sequence<sequence<std::pair<vertex,distance>>> pack() {
     return tabulate(sizes.size(), [&] (int i){
-	     return to_sequence(A[i].head(sizes[i]));});
+             return to_sequence(A[i].head(sizes[i]));});
   }
 };
 
@@ -81,11 +81,11 @@ struct vertex_info{
 
 template <typename vertex, typename graph>
 auto truncated_bfs(graph& G, graph& GT,
-		  sequence<vertex>& srcs,
-		  sequence<vertex>& order,
-		  sequence<distance>& delta_ro,
-		  sequence<std::atomic<distance>>& delta,
-		  le_list& L){
+                  sequence<vertex>& srcs,
+                  sequence<vertex>& order,
+                  sequence<distance>& delta_ro,
+                  sequence<std::atomic<distance>>& delta,
+                  le_list& L){
   vertex n = G.size();
   auto vtxs = sequence<struct vertex_info>(n);
   distance dist = 0;
@@ -102,21 +102,21 @@ auto truncated_bfs(graph& G, graph& GT,
   // function to apply on each edge s->d
   auto edge_f = [&] (vertex s, vertex d) -> bool {
     if ((vtxs[d].root_ro == Empty
-	 || order[vtxs[s].root_ro] < order[vtxs[d].root_ro])
-	&& (dist < delta_ro[d])) {
+         || order[vtxs[s].root_ro] < order[vtxs[d].root_ro])
+        && (dist < delta_ro[d])) {
       // if d is unvisited or root priority of d is larger than s,
       // and distance to d is greater than current distance,
       // then try to update the root of d
       auto less = [&] (vertex newv, vertex old) {
-		    return (old == Empty || (order[newv] < order[old]));};
+                    return (old == Empty || (order[newv] < order[old]));};
 
       if (write_min(&vtxs[d].root, vtxs[s].root_ro, less)) {
-	L.insert(d, vtxs[s].root_ro, dist);
-	// update nearest distance to d
-	write_min(&delta[d], dist, std::less<distance>());
-	// only true if first to update d (avoids duplicates in frontier)
-	distance old = vtxs[d].step;
-	return (dist > old) && vtxs[d].step.compare_exchange_strong(old, dist);
+        L.insert(d, vtxs[s].root_ro, dist);
+        // update nearest distance to d
+        write_min(&delta[d], dist, std::less<distance>());
+        // only true if first to update d (avoids duplicates in frontier)
+        distance old = vtxs[d].step;
+        return (dist > old) && vtxs[d].step.compare_exchange_strong(old, dist);
       }
       return false;
     }
@@ -128,7 +128,7 @@ auto truncated_bfs(graph& G, graph& GT,
   // in this chunk by the highest priority in the chunk.
   auto cond_f = [&] (vertex d) {
     return (dist < delta_ro[d] &&
-	    (vtxs[d].root == Empty || order[vtxs[d].root] != start));
+            (vtxs[d].root == Empty || order[vtxs[d].root] != start));
   };
 
   auto frontier_map = ligra::edge_map(G, GT, edge_f, cond_f);
@@ -156,7 +156,7 @@ auto create_le_list(Graph& G, Graph& GT, sequence<vertex>& order) {
 
   // initial distances of "infinity"
   auto delta = tabulate<std::atomic<distance>>(n, [] (vertex i) {
-		     return max_distance;});
+                     return max_distance;});
   
   // BFS using prefix doubling, i.e. increasing prefixes of doubling size
   for(vertex r = 0; r < n; r = 2*r + 1){
@@ -171,11 +171,11 @@ auto create_le_list(Graph& G, Graph& GT, sequence<vertex>& order) {
   parallel_for(0, n, [&] (vertex i){
     //Sort lists in order of increasing vertex number (based on permutation)
     sort_inplace(lists[i], [&] (auto p1, auto p2) {
-		     return inv_order[p1.first] < inv_order[p2.first];});
+                     return inv_order[p1.first] < inv_order[p2.first];});
 
     //Prune lists for "extra elements"
     auto flags = tabulate(lists[i].size(), [&lists, i] (int j){
-		   return j==0 || lists[i][j].second < lists[i][j-1].second;});
+                   return j==0 || lists[i][j].second < lists[i][j-1].second;});
     lists[i] = pack(lists[i],flags);
   });
   return lists;
diff --git a/examples/lubys.cpp b/examples/lubys.cpp
index 19ed4ad..d1fa62e 100644
--- a/examples/lubys.cpp
+++ b/examples/lubys.cpp
@@ -34,19 +34,22 @@ int main(int argc, char* argv[]) {
     parlay::internal::timer t("Time");
     parlay::sequence<bool> in_set;
     for (int i=0; i < 5; i++) {
-      in_set = MIS(G);
+      in_set = luby_MIS(G);
       t.next("lubys");
     }
-    
+
     // check whether an mis
     bool bad_mis = parlay::any_of(parlay::iota(n), [&] (vertex u) {
-      return ((in_set[u] && parlay::any_of(G[u], [&] (vertex v) {return in_set[v];}))  ||
-	      (!in_set[u] && parlay::none_of(G[u], [&] (vertex v) {return in_set[v];})));});
+      return ((in_set[u] && parlay::any_of(G[u], [&] (vertex v) {
+                                                return in_set[v];}))||
+              (!in_set[u] && parlay::none_of(G[u], [&] (vertex v) {
+                                                return in_set[v];})));});
 
     if (bad_mis > 0) 
       std::cout << "not a maximal independent set" << std::endl;
-      
-    int num_in_set = parlay::reduce(parlay::map(in_set,[] (bool a) {return a ? 1 : 0;}));
+
+    int num_in_set = parlay::reduce(parlay::map(in_set,[] (bool a) {
+                                          return a ? 1 : 0;}));
     std::cout << "number in set: " << num_in_set << std::endl;
   }
 }
diff --git a/examples/lubys.h b/examples/lubys.h
index cd60239..5a6c0d9 100644
--- a/examples/lubys.h
+++ b/examples/lubys.h
@@ -3,9 +3,7 @@
 
 #include <parlay/sequence.h>
 #include <parlay/primitives.h>
-#include <parlay/delayed.h>
 #include <parlay/random.h>
-#include <parlay/monoid.h>
 
 // **************************************************************
 // Luby's algorithm for Maximal Independent Set (MIS).  From:
@@ -18,7 +16,7 @@
 
 // Note graph is copied since it is mutated internally
 template <typename graph>
-parlay::sequence<bool> MIS(const graph& G_in) {
+parlay::sequence<bool> luby_MIS(const graph& G_in) {
   using vertex = typename graph::value_type::value_type;
   long n = G_in.size();
   parlay::random_generator gen(0);
@@ -28,10 +26,11 @@ parlay::sequence<bool> MIS(const graph& G_in) {
   const graph* G = &G_in;
   graph G_nxt(n);
   
-  // Each vertex is either in the set, out of the set, or unknown (initially)
+  // Each vertex is either in the set, out of the set, or unknown
+  // (initially)
   enum state : char {InSet, OutSet, Unknown};
   auto states = parlay::tabulate<std::atomic<state>>(n, [&] (long i) {
-			return Unknown;});
+                        return Unknown;});
 
   // initial active vetices (all of them)
   auto V = parlay::tabulate(n, [] (vertex i) {return i;});
@@ -43,18 +42,16 @@ parlay::sequence<bool> MIS(const graph& G_in) {
   while (V.size() > 0) {
 
     // pick random priorities for the remaining vertices
-    for_each(V, [&] (vertex u) {auto r = gen[u]; priority[u] = dis(r);});
+    for_each(V, [&] (vertex u) {
+                  auto r = gen[u]; priority[u] = dis(r);});
   
     // for each remaining vertex, if it has a local max priority then
     // add it to the MIS and remove neighbors
     for_each(V, [&] (vertex u) {
-      // if (priority[u] > 
-      // 	  reduce(parlay::delayed::map((*G)[u], [&] (vertex v) {return priority[v];}),
-      // 		 parlay::maxm<vertex>())) {
       if (parlay::find_if((*G)[u], [&] (vertex v) {
-   	        return priority[v] >= priority[u];}) == (*G)[u].end()) {
-	states[u] = InSet;
-	for_each((*G)[u], [&] (vertex v) {
+                return priority[v] >= priority[u];}) == (*G)[u].end()) {
+        states[u] = InSet;
+        for_each((*G)[u], [&] (vertex v) {
           if (states[v] == Unknown) states[v] = OutSet;});
       }});
     
@@ -64,12 +61,13 @@ parlay::sequence<bool> MIS(const graph& G_in) {
     // only keep edges for which both endpoints are unknown
     for_each(V, [&] (vertex u) {
       G_nxt[u] = parlay::filter((*G)[u], [&] (vertex v) {
-	            return states[v] == Unknown;});});
+                    return states[v] == Unknown;});});
     
     // advanced state of random number generator
     gen = gen[n];
-    G = &G_nxt;
+    G = &G_nxt; // only needed on first iteration
   }
 
+  // convert to booleans indicating if in the MIS
   return parlay::map(states, [] (auto& s) {return s.load() == InSet;});
 }
diff --git a/examples/minimum_edit_distance.h b/examples/minimum_edit_distance.h
index cbf41af..1de21dc 100644
--- a/examples/minimum_edit_distance.h
+++ b/examples/minimum_edit_distance.h
@@ -35,7 +35,7 @@ long minimum_edit_distance(const Seq& s1, const Seq& s2) {
                        int* col_in, int* col_out, auto s2, int m,
                        int diag) {
     parlay::sequence<int> row(n);
-    for (int i=0; i < n; i++)	row[i] = row_in[i];
+    for (int i=0; i < n; i++)        row[i] = row_in[i];
     for (int j=0; j < m; j++) {
       int prev = col_in[j];
       for (int i=0; i < n; i++) {
diff --git a/examples/radix_tree.h b/examples/radix_tree.h
index e24fdef..a3dc658 100644
--- a/examples/radix_tree.h
+++ b/examples/radix_tree.h
@@ -64,7 +64,7 @@ struct radix_tree {
     // the same LCP value.  The following identifies the roots of such
     // subtrees, and labels each with an index.
     auto root_flags = parlay::tabulate(n - 1, [&] (index i) {
-	return (i == Parents[i] || lcps[i] != lcps[Parents[i]]);});
+        return (i == Parents[i] || lcps[i] != lcps[Parents[i]]);});
     auto root_locs = parlay::pack_index(root_flags);
     index num_roots = root_locs.size();
     parlay::sequence<index> root_ids(n-1);
@@ -73,36 +73,36 @@ struct radix_tree {
     // find root of subtree for location i
     auto cluster_root = [&] (index i) {
       while (i != Parents[i] && lcps[i] == lcps[Parents[i]])
-	  i = Parents[i];
+          i = Parents[i];
       return i;};
     // overall root
     root = root_ids[*parlay::find_if(root_ids, [&] (long i) {
-	return Parents[i] == i;})];
+        return Parents[i] == i;})];
 
     // For each root of a subtree return a pair of the index of its
     // parent subtree root node, and its index*2 + 1.
     auto internal = parlay::delayed_tabulate(num_roots, [&] (index i) {
-	if (i == root) return std::pair{num_roots,2*i+1};
-	index j = root_locs[i];
-	index parent = root_ids[cluster_root(Parents[j])];
-	return std::pair{parent, 2 * i + 1};});
+        if (i == root) return std::pair{num_roots,2*i+1};
+        index j = root_locs[i];
+        index parent = root_ids[cluster_root(Parents[j])];
+        return std::pair{parent, 2 * i + 1};});
 
     // For each leaf (one per input string) return a pair
     // of the index of its parent subtree root node, and 2* its index.
     auto leaves = parlay::delayed_tabulate(n, [&] (index i) {
-	// for each element of SA find larger LCP on either side
-	// the root of its subtree will be its parent in the radix tree
-	index parent = (i == 0) ? 0 : ((i==n-1) ? i-1 : (lcps[i-1] > lcps[i] ? i-1 : i));
-	parent = root_ids[cluster_root(parent)];
-	return std::pair{parent, 2*i}; });
+        // for each element of SA find larger LCP on either side
+        // the root of its subtree will be its parent in the radix tree
+        index parent = (i == 0) ? 0 : ((i==n-1) ? i-1 : (lcps[i-1] > lcps[i] ? i-1 : i));
+        parent = root_ids[cluster_root(parent)];
+        return std::pair{parent, 2*i}; });
 
     // generate a mapping from node indices to indices of their children
     auto groups = parlay::group_by_index(parlay::append(internal, leaves), num_roots+1);
     
     // Create nodes. 
     tree = parlay::tabulate(num_roots, [&] (index i) {
-	index j = root_locs[i];
-	return node{lcps[j], j, std::move(groups[i])};});
+        index j = root_locs[i];
+        return node{lcps[j], j, std::move(groups[i])};});
   }
 
   radix_tree() {}
diff --git a/examples/range_min.h b/examples/range_min.h
index 9c74034..be351c9 100644
--- a/examples/range_min.h
+++ b/examples/range_min.h
@@ -18,7 +18,7 @@
 //   Query takes O(block_size) time
 // **************************************************************
 template <typename Seq, typename Less = std::less<typename Seq::value_type>,
-	  typename Uint=unsigned int>
+          typename Uint=unsigned int>
 class range_min {
  public:
   range_min(Seq &a, Less&& less = {}, long block_size=32)
diff --git a/examples/ray_trace.h b/examples/ray_trace.h
index a2d7839..989e1b0 100644
--- a/examples/ray_trace.h
+++ b/examples/ray_trace.h
@@ -32,8 +32,8 @@ struct vect3d {
     return a[0]*b[0] + a[1]*b[1] + a[2]*b[2];}
   vect3d cross(vect3d b) const {
     return vect3d(a[1]*b[2] - a[2]*b[1],
-		a[2]*b[0] - a[0]*b[2],
-		a[0]*b[1] - a[1]*b[0]); }
+                a[2]*b[0] - a[0]*b[2],
+                a[0]*b[1] - a[1]*b[0]); }
   coord& operator[](int i) {return a[i];};
   vect3d() : a({0.0,0.0,0.0}) {}
   vect3d(coord x, coord y, coord z) : a({x,y,z}) {}
@@ -54,8 +54,8 @@ range get_range(coord c0, coord c1, coord c2) {
 
 inline bool in_box(point3d p, Bounding_Box B) {
   return (p[0] >= (B[0][0] - epsilon) && p[0] <= (B[0][1] + epsilon) &&
-	  p[1] >= (B[1][0] - epsilon) && p[1] <= (B[1][1] + epsilon) &&
-	  p[2] >= (B[2][0] - epsilon) && p[2] <= (B[2][1] + epsilon));
+          p[1] >= (B[1][0] - epsilon) && p[1] <= (B[1][1] + epsilon) &&
+          p[2] >= (B[2][0] - epsilon) && p[2] <= (B[2][1] + epsilon));
 }
 
 // Code for ray_triangle_intersect is based on:
@@ -98,9 +98,9 @@ coord ray_triangle_intersect(ray r, triangle tri) {
 // The triangles are given by n indices I into the triangle array Tri.
 // -1 is returned if there is no intersection
 index_t find_ray(ray r,
-		 parlay::sequence<index_t> const &I, 
-		 Triangles const &Tri,
-		 Bounding_Box B) {
+                 parlay::sequence<index_t> const &I, 
+                 Triangles const &Tri,
+                 Bounding_Box B) {
   auto [o, d] = r;
   index_t n = I.size();
   coord tMin = std::numeric_limits<coord>::max();
@@ -166,7 +166,7 @@ parlay::sequence<index_t> ray_cast(const Triangles &triangles, const Rays &rays)
   auto boxes = parlay::tabulate(triangles.size(), [&] (size_t i) {
       auto [p0,p1,p2] = triangles[i];
       return Bounding_Box{get_range(p0[0],p1[0],p2[0]),get_range(p0[1],p1[1],p2[1]),
-	  get_range(p0[2],p1[2],p2[2])};});
+          get_range(p0[2],p1[2],p2[2])};});
   
   tree_node* R = kdtree_from_boxes(boxes);
 
diff --git a/examples/rectangle_intersection.h b/examples/rectangle_intersection.h
index e7658e0..688721b 100644
--- a/examples/rectangle_intersection.h
+++ b/examples/rectangle_intersection.h
@@ -30,21 +30,21 @@ void process_recursive(tree_node* T, const Boxes &rectangles, pair_seq* results)
   if (T->is_leaf()) {
     // get all intersections within the leaf
     *results = parlay::flatten(parlay::tabulate(T->n, [&] (index_t i) {
-	  index_t idx_a = T->box_indices[i];
-	  auto x = parlay::tabulate(T->n - i - 1, [&] (long j) {
-	      index_t idx_b = T->box_indices[i+j+1];
-	      if (!intersect(rectangles[idx_a], rectangles[idx_b]))
-	  	return std::pair{-1,-1};
-	      else return std::pair{std::min(idx_a, idx_b), std::max(idx_a, idx_b)};}, 1000);
-	  return filter(x, [] (auto p) {return p.first > -1;});}, 100));
+          index_t idx_a = T->box_indices[i];
+          auto x = parlay::tabulate(T->n - i - 1, [&] (long j) {
+              index_t idx_b = T->box_indices[i+j+1];
+              if (!intersect(rectangles[idx_a], rectangles[idx_b]))
+                  return std::pair{-1,-1};
+              else return std::pair{std::min(idx_a, idx_b), std::max(idx_a, idx_b)};}, 1000);
+          return filter(x, [] (auto p) {return p.first > -1;});}, 100));
   } else {
     // recurse to two subtrees
     parlay::par_do([&] {process_recursive(T->left, rectangles, results);},
-		   [&] {process_recursive(T->right, rectangles,
-					  results + T->left->num_leaves);});
+                   [&] {process_recursive(T->right, rectangles,
+                                          results + T->left->num_leaves);});
   }
 }
-			
+                        
 auto rectangle_intersection(Boxes &rectangles) {
   tree_node* R = kdtree_from_boxes(rectangles);
   parlay::sequence<pair_seq> results(R->num_leaves);
diff --git a/examples/star_connectivity.h b/examples/star_connectivity.h
index e1a2014..f6e8b6b 100644
--- a/examples/star_connectivity.h
+++ b/examples/star_connectivity.h
@@ -26,9 +26,9 @@ std::uniform_real_distribution<double> rdis(0.0,1.0);
 template <typename vertex>
 parlay::sequence<vertex>
 star_contract(const parlay::sequence<std::pair<vertex,vertex>>& E,
-	      parlay::sequence<vertex> V,
-	      parlay::sequence<vertex>& parents,
-	      rg generator) {
+              parlay::sequence<vertex> V,
+              parlay::sequence<vertex>& parents,
+              rg generator) {
   using edge = std::pair<vertex, vertex>;
 
   // std::cout << "size : " << E.size() << ", " << V.size() << std::endl;
@@ -67,7 +67,7 @@ star_contract(const parlay::sequence<std::pair<vertex,vertex>>& E,
 template <typename vertex>
 std::pair<parlay::sequence<vertex>, parlay::sequence<vertex>>
 star_connectivity_simple(const parlay::sequence<std::pair<vertex,vertex>>& E,
-			 long n) {
+                         long n) {
   using edge = std::pair<vertex, vertex>;
   auto parents = parlay::tabulate(n, [] (vertex i) {return i;});
   auto roots = star_contract(E, parents, parents, rg(0));
@@ -82,9 +82,9 @@ star_connectivity_simple(const parlay::sequence<std::pair<vertex,vertex>>& E,
 template <typename vertex>
 parlay::sequence<vertex>
 star_contract_sample(const parlay::sequence<std::pair<vertex,vertex>>& E,
-		     parlay::sequence<vertex> V,
-		     parlay::sequence<vertex>& parents,
-		     rg generator) {
+                     parlay::sequence<vertex> V,
+                     parlay::sequence<vertex>& parents,
+                     rg generator) {
   using edge = std::pair<vertex, vertex>;
 
   // std::cout << "size : " << E.size() << ", " << V.size() << std::endl;
@@ -97,8 +97,8 @@ star_contract_sample(const parlay::sequence<std::pair<vertex,vertex>>& E,
     float frac = V.size() * 3.0 / E.size();
     rg r = generator[0];
     auto Ee = parlay::map_maybe(parlay::iota(E.size()), [&] (long i) {
-	rg r = generator[i];
-	return rdis(r) < frac ? std::optional<edge>(E[i]) : std::optional<edge>();});
+        rg r = generator[i];
+        return rdis(r) < frac ? std::optional<edge>(E[i]) : std::optional<edge>();});
     return star_contract_sample(Ee, V, parents, generator[E.size()]);
   }
 
@@ -131,7 +131,7 @@ star_contract_sample(const parlay::sequence<std::pair<vertex,vertex>>& E,
 template <typename vertex>
 std::pair<parlay::sequence<vertex>, parlay::sequence<vertex>>
 star_connectivity(const parlay::sequence<std::pair<vertex,vertex>>& E,
-		  long n) {
+                  long n) {
   using edge = std::pair<vertex, vertex>;
   auto parents = parlay::tabulate(n, [] (vertex i) {return i;});
 
diff --git a/examples/suffix_tree.h b/examples/suffix_tree.h
index ed51bb7..2c83da5 100644
--- a/examples/suffix_tree.h
+++ b/examples/suffix_tree.h
@@ -24,8 +24,8 @@ radix_tree<index> suffix_tree(const Str& S) {
   parlay::for_each(result.tree, [&] (auto& node) {
       node.string_idx = SA[node.string_idx];
       node.children = parlay::map(node.children, [&] (index i) {
-	  // leaf if (i%2 == 0), otherwise internal
-	  return (i % 2 == 0) ? 2 * SA[i/2] : i;}, 1000);});
+          // leaf if (i%2 == 0), otherwise internal
+          return (i % 2 == 0) ? 2 * SA[i/2] : i;}, 1000);});
   return result;
 }
 
@@ -39,8 +39,8 @@ long find(const radix_tree<index>& T, const Str& str, const sStr& search_str) {
     // check if any children match on first character
     for (auto child : T.get_children(current))
       if (str[T.get_string(child) + depth] == search_str[depth]) {
-	new_node = child;
-	break;
+        new_node = child;
+        break;
       }
     if (new_node == current) return -1l; // none found
     index new_depth = T.is_leaf(new_node) ? str.size() - T.get_string(new_node) : T.get_depth(new_node);