diff --git a/CMakeLists.txt b/CMakeLists.txt
index af20ee10..07f82b48 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 
 if(NOT CMAKE_CXX_STANDARD EQUAL 17)
     add_compile_definitions(PISA_ENABLE_CONCEPTS=1)
+    add_compile_definitions(PISA_CXX20=1)
 endif()
 add_compile_definitions(BOOST_NO_CXX98_FUNCTION_BASE=1)
 
diff --git a/include/pisa/block_posting_list.hpp b/include/pisa/block_posting_list.hpp
index 2b05e2ca..25e8462a 100644
--- a/include/pisa/block_posting_list.hpp
+++ b/include/pisa/block_posting_list.hpp
@@ -99,7 +99,7 @@ struct block_posting_list {
 
         void PISA_ALWAYSINLINE next() {
             ++m_pos_in_block;
-            if (PISA_UNLIKELY(m_pos_in_block == m_cur_block_size)) {
+            if PISA_UNLIKELY (m_pos_in_block == m_cur_block_size) {
                 if (m_cur_block + 1 == m_blocks) {
                     m_cur_docid = m_universe;
                     return;
@@ -118,7 +118,7 @@ struct block_posting_list {
          * to the current document ID, the position will not change.
          */
         void PISA_ALWAYSINLINE next_geq(uint64_t lower_bound) {
-            if (PISA_UNLIKELY(lower_bound > m_cur_block_max)) {
+            if PISA_UNLIKELY (lower_bound > m_cur_block_max) {
                 // binary search seems to perform worse here
                 if (lower_bound > block_max(m_blocks - 1)) {
                     m_cur_docid = m_universe;
@@ -142,7 +142,7 @@ struct block_posting_list {
         void PISA_ALWAYSINLINE move(uint64_t pos) {
             assert(pos >= position());
             uint64_t block = pos / BlockCodec::block_size;
-            if (PISA_UNLIKELY(block != m_cur_block)) {
+            if PISA_UNLIKELY (block != m_cur_block) {
                 decode_docs_block(block);
             }
             while (position() < pos) {
diff --git a/include/pisa/codec/block_codecs.hpp b/include/pisa/codec/block_codecs.hpp
index 04a8a5b4..47b1ebea 100644
--- a/include/pisa/codec/block_codecs.hpp
+++ b/include/pisa/codec/block_codecs.hpp
@@ -230,7 +230,7 @@ struct optpfor_block {
         thread_local codec_type optpfor_codec;  // pfor decoding is *not* thread-safe
         assert(n <= block_size);
 
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
 
@@ -306,7 +306,7 @@ struct varint_G8IU_block {
         static codec_type varint_codec;  // decodeBlock is thread-safe
         assert(n <= block_size);
 
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
 
diff --git a/include/pisa/codec/compact_elias_fano.hpp b/include/pisa/codec/compact_elias_fano.hpp
index 6bd99ce8..43e3e180 100644
--- a/include/pisa/codec/compact_elias_fano.hpp
+++ b/include/pisa/codec/compact_elias_fano.hpp
@@ -164,9 +164,9 @@ struct compact_elias_fano {
 
             uint64_t skip = position - m_position;
             // optimize small forward skips
-            if (PISA_LIKELY(position > m_position && skip <= linear_scan_threshold)) {
+            if PISA_LIKELY (position > m_position && skip <= linear_scan_threshold) {
                 m_position = position;
-                if (PISA_UNLIKELY(m_position == size())) {
+                if PISA_UNLIKELY (m_position == size()) {
                     m_value = m_of.universe;
                 } else {
                     bit_vector::unary_enumerator he = m_high_enumerator;
@@ -193,13 +193,13 @@ struct compact_elias_fano {
             uint64_t cur_high = m_value >> m_of.lower_bits;
             uint64_t high_diff = high_lower_bound - cur_high;
 
-            if (PISA_LIKELY(lower_bound > m_value && high_diff <= linear_scan_threshold)) {
+            if PISA_LIKELY (lower_bound > m_value && high_diff <= linear_scan_threshold) {
                 // optimize small skips
                 next_reader next_value(*this, m_position + 1);
                 uint64_t val;
                 do {
                     m_position += 1;
-                    if (PISA_LIKELY(m_position < size())) {
+                    if PISA_LIKELY (m_position < size()) {
                         val = next_value();
                     } else {
                         m_position = size();
@@ -220,7 +220,7 @@ struct compact_elias_fano {
             m_position += 1;
             assert(m_position <= size());
 
-            if (PISA_LIKELY(m_position < size())) {
+            if PISA_LIKELY (m_position < size()) {
                 m_value = read_next();
             } else {
                 m_value = m_of.universe;
@@ -234,7 +234,7 @@ struct compact_elias_fano {
             }
 
             uint64_t prev_high = 0;
-            if (PISA_LIKELY(m_position < size())) {
+            if PISA_LIKELY (m_position < size()) {
                 prev_high = m_bv->predecessor1(m_high_enumerator.position() - 1);
             } else {
                 prev_high = m_bv->predecessor1(m_of.lower_bits_offset - 1);
@@ -253,7 +253,7 @@ struct compact_elias_fano {
 
       private:
         value_type PISA_NOINLINE slow_move(uint64_t position) {
-            if (PISA_UNLIKELY(position == size())) {
+            if PISA_UNLIKELY (position == size()) {
                 m_position = position;
                 m_value = m_of.universe;
                 return value();
@@ -279,7 +279,7 @@ struct compact_elias_fano {
         }
 
         value_type PISA_NOINLINE slow_next_geq(uint64_t lower_bound) {
-            if (PISA_UNLIKELY(lower_bound >= m_of.universe)) {
+            if PISA_UNLIKELY (lower_bound >= m_of.universe) {
                 return move(size());
             }
 
@@ -309,7 +309,7 @@ struct compact_elias_fano {
 
             next_reader read_value(*this, m_position);
             while (true) {
-                if (PISA_UNLIKELY(m_position == size())) {
+                if PISA_UNLIKELY (m_position == size()) {
                     m_value = m_of.universe;
                     return value();
                 }
diff --git a/include/pisa/codec/compact_ranked_bitvector.hpp b/include/pisa/codec/compact_ranked_bitvector.hpp
index c06c8f9b..7900d67a 100644
--- a/include/pisa/codec/compact_ranked_bitvector.hpp
+++ b/include/pisa/codec/compact_ranked_bitvector.hpp
@@ -139,9 +139,9 @@ struct compact_ranked_bitvector {
 
             // optimize small forward skips
             uint64_t skip = position - m_position;
-            if (PISA_LIKELY(position > m_position && skip <= linear_scan_threshold)) {
+            if PISA_LIKELY (position > m_position && skip <= linear_scan_threshold) {
                 m_position = position;
-                if (PISA_UNLIKELY(m_position == size())) {
+                if PISA_UNLIKELY (m_position == size()) {
                     m_value = m_of.universe;
                 } else {
                     bit_vector::unary_enumerator he = m_enumerator;
@@ -164,13 +164,13 @@ struct compact_ranked_bitvector {
             }
 
             uint64_t diff = lower_bound - m_value;
-            if (PISA_LIKELY(lower_bound > m_value && diff <= linear_scan_threshold)) {
+            if PISA_LIKELY (lower_bound > m_value && diff <= linear_scan_threshold) {
                 // optimize small skips
                 bit_vector::unary_enumerator he = m_enumerator;
                 uint64_t val;
                 do {
                     m_position += 1;
-                    if (PISA_LIKELY(m_position < size())) {
+                    if PISA_LIKELY (m_position < size()) {
                         val = he.next() - m_of.bits_offset;
                     } else {
                         m_position = size();
@@ -190,7 +190,7 @@ struct compact_ranked_bitvector {
             m_position += 1;
             assert(m_position <= size());
 
-            if (PISA_LIKELY(m_position < size())) {
+            if PISA_LIKELY (m_position < size()) {
                 m_value = read_next();
             } else {
                 m_value = m_of.universe;
@@ -206,7 +206,7 @@ struct compact_ranked_bitvector {
             }
 
             uint64_t pos = 0;
-            if (PISA_LIKELY(m_position < size())) {
+            if PISA_LIKELY (m_position < size()) {
                 pos = m_bv->predecessor1(m_enumerator.position() - 1);
             } else {
                 pos = m_bv->predecessor1(m_of.end - 1);
@@ -218,7 +218,7 @@ struct compact_ranked_bitvector {
       private:
         value_type PISA_NOINLINE slow_move(uint64_t position) {
             uint64_t skip = position - m_position;
-            if (PISA_UNLIKELY(position == size())) {
+            if PISA_UNLIKELY (position == size()) {
                 m_position = position;
                 m_value = m_of.universe;
                 return value();
@@ -245,7 +245,7 @@ struct compact_ranked_bitvector {
         value_type PISA_NOINLINE slow_next_geq(uint64_t lower_bound) {
             using broadword::popcount;
 
-            if (PISA_UNLIKELY(lower_bound >= m_of.universe)) {
+            if PISA_UNLIKELY (lower_bound >= m_of.universe) {
                 return move(size());
             }
 
diff --git a/include/pisa/codec/maskedvbyte.hpp b/include/pisa/codec/maskedvbyte.hpp
index 05e9e587..b31d2228 100644
--- a/include/pisa/codec/maskedvbyte.hpp
+++ b/include/pisa/codec/maskedvbyte.hpp
@@ -24,7 +24,7 @@ struct maskedvbyte_block {
     }
     static uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) {
         assert(n <= block_size);
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
         auto read = masked_vbyte_decode(in, out, n);
diff --git a/include/pisa/codec/qmx.hpp b/include/pisa/codec/qmx.hpp
index 16781be1..5b6d800f 100644
--- a/include/pisa/codec/qmx.hpp
+++ b/include/pisa/codec/qmx.hpp
@@ -33,7 +33,7 @@ struct qmx_block {
     static uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) {
         static QMX::compress_integer_qmx_improved qmx_codec;  // decodeBlock is thread-safe
         assert(n <= block_size);
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
         uint32_t enc_len = 0;
diff --git a/include/pisa/codec/simdbp.hpp b/include/pisa/codec/simdbp.hpp
index a3955cbd..200aadba 100644
--- a/include/pisa/codec/simdbp.hpp
+++ b/include/pisa/codec/simdbp.hpp
@@ -28,7 +28,7 @@ struct simdbp_block {
     }
     static uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) {
         assert(n <= block_size);
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
         uint32_t b = *in++;
diff --git a/include/pisa/codec/varintgb.hpp b/include/pisa/codec/varintgb.hpp
index 6ebaee23..84b55a80 100644
--- a/include/pisa/codec/varintgb.hpp
+++ b/include/pisa/codec/varintgb.hpp
@@ -250,7 +250,7 @@ struct varintgb_block {
     static uint8_t const* decode(uint8_t const* in, uint32_t* out, uint32_t sum_of_values, size_t n) {
         thread_local VarIntGB<false> varintgb_codec;
         assert(n <= block_size);
-        if (PISA_UNLIKELY(n < block_size)) {
+        if PISA_UNLIKELY (n < block_size) {
             return interpolative_block::decode(in, out, sum_of_values, n);
         }
         auto read = varintgb_codec.decodeArray(in, n, out);
diff --git a/include/pisa/query/algorithm/maxscore_query.hpp b/include/pisa/query/algorithm/maxscore_query.hpp
index 5b127e82..c68edb6a 100644
--- a/include/pisa/query/algorithm/maxscore_query.hpp
+++ b/include/pisa/query/algorithm/maxscore_query.hpp
@@ -83,7 +83,7 @@ struct maxscore_query {
         while (current_docid < max_docid) {
             auto status = DocumentStatus::Skip;
             while (status == DocumentStatus::Skip) {
-                if (PISA_UNLIKELY(next_docid >= max_docid)) {
+                if PISA_UNLIKELY (next_docid >= max_docid) {
                     return;
                 }
 
diff --git a/include/pisa/recursive_graph_bisection.hpp b/include/pisa/recursive_graph_bisection.hpp
index 56db570e..85f27646 100644
--- a/include/pisa/recursive_graph_bisection.hpp
+++ b/include/pisa/recursive_graph_bisection.hpp
@@ -180,7 +180,7 @@ void compute_move_gains_caching(
         auto terms = range.terms(d);
         for (const auto& t: terms) {
             if constexpr (isLikelyCached) {  // NOLINT(readability-braces-around-statements)
-                if (PISA_UNLIKELY(not gain_cache.has_value(t))) {
+                if PISA_UNLIKELY (not gain_cache.has_value(t)) {
                     const auto& from_deg = from_lex[t];
                     const auto& to_deg = to_lex[t];
                     const auto term_gain = bp::expb(logn1, logn2, from_deg, to_deg)
@@ -188,7 +188,7 @@ void compute_move_gains_caching(
                     gain_cache.set(t, term_gain);
                 }
             } else {
-                if (PISA_LIKELY(not gain_cache.has_value(t))) {
+                if PISA_LIKELY (not gain_cache.has_value(t)) {
                     const auto& from_deg = from_lex[t];
                     const auto& to_deg = to_lex[t];
                     const auto term_gain = bp::expb(logn1, logn2, from_deg, to_deg)
@@ -223,7 +223,7 @@ void swap(document_partition<Iterator>& partition, degree_map_pair& degrees) {
     auto lit = left.begin();
     auto rit = right.begin();
     for (; lit != left.end() && rit != right.end(); ++lit, ++rit) {
-        if (PISA_UNLIKELY(left.gain(*lit) + right.gain(*rit) <= 0)) {
+        if PISA_UNLIKELY (left.gain(*lit) + right.gain(*rit) <= 0) {
             break;
         }
         {
diff --git a/include/pisa/sequence/partitioned_sequence.hpp b/include/pisa/sequence/partitioned_sequence.hpp
index 5d15ef24..e80c398f 100644
--- a/include/pisa/sequence/partitioned_sequence.hpp
+++ b/include/pisa/sequence/partitioned_sequence.hpp
@@ -186,7 +186,7 @@ struct partitioned_sequence {
         // note: this is instantiated oly if BaseSequence has next_geq
         template <typename Q = base_sequence_enumerator, typename = if_has_next_geq<Q>>
         value_type PISA_ALWAYSINLINE next_geq(uint64_t lower_bound) {
-            if (PISA_LIKELY(lower_bound >= m_cur_base && lower_bound <= m_cur_upper_bound)) {
+            if PISA_LIKELY (lower_bound >= m_cur_base && lower_bound <= m_cur_upper_bound) {
                 auto val = m_partition_enum.next_geq(lower_bound - m_cur_base);
                 m_position = m_cur_begin + val.first;
                 return value_type(m_position, m_cur_base + val.second);
@@ -197,7 +197,7 @@ struct partitioned_sequence {
         value_type PISA_ALWAYSINLINE next() {
             ++m_position;
 
-            if (PISA_LIKELY(m_position < m_cur_end)) {
+            if PISA_LIKELY (m_position < m_cur_end) {
                 uint64_t val = m_cur_base + m_partition_enum.next().second;
                 return value_type(m_position, val);
             }
@@ -207,7 +207,7 @@ struct partitioned_sequence {
         uint64_t size() const { return m_size; }
 
         uint64_t prev_value() const {
-            if (PISA_UNLIKELY(m_position == m_cur_begin)) {
+            if PISA_UNLIKELY (m_position == m_cur_begin) {
                 return m_cur_partition != 0U ? m_cur_base - 1 : 0;
             }
             return m_cur_base + m_partition_enum.prev_value();
@@ -224,7 +224,7 @@ struct partitioned_sequence {
         // tight loops, on microbenchmarks this causes an improvement of
         // about 3ns on my i7 3Ghz
         value_type PISA_NOINLINE slow_next() {
-            if (PISA_UNLIKELY(m_position == m_size)) {
+            if PISA_UNLIKELY (m_position == m_size) {
                 assert(m_cur_partition == m_partitions - 1);
                 auto val = m_partition_enum.next();
                 assert(val.first == m_partition_enum.size());
diff --git a/include/pisa/sequence/positive_sequence.hpp b/include/pisa/sequence/positive_sequence.hpp
index 838fe4f9..0e559bc9 100644
--- a/include/pisa/sequence/positive_sequence.hpp
+++ b/include/pisa/sequence/positive_sequence.hpp
@@ -48,7 +48,7 @@ struct positive_sequence {
             // the most common cases
             uint64_t prev = m_cur;
             if (position != m_position + 1) {
-                if (PISA_UNLIKELY(position == 0)) {
+                if PISA_UNLIKELY (position == 0) {
                     // we need to special-case position 0
                     m_cur = m_base_enum.move(0).second;
                     m_position = 0;
diff --git a/include/pisa/sequence/uniform_partitioned_sequence.hpp b/include/pisa/sequence/uniform_partitioned_sequence.hpp
index 94f8b471..0f4b941c 100644
--- a/include/pisa/sequence/uniform_partitioned_sequence.hpp
+++ b/include/pisa/sequence/uniform_partitioned_sequence.hpp
@@ -171,7 +171,7 @@ struct uniform_partitioned_sequence {
         // note: this is instantiated oly if BaseSequence has next_geq
         template <typename Q = base_sequence_enumerator, typename = if_has_next_geq<Q>>
         value_type PISA_ALWAYSINLINE next_geq(uint64_t lower_bound) {
-            if (PISA_LIKELY(lower_bound >= m_cur_base && lower_bound <= m_cur_upper_bound)) {
+            if PISA_LIKELY (lower_bound >= m_cur_base && lower_bound <= m_cur_upper_bound) {
                 auto val = m_partition_enum.next_geq(lower_bound - m_cur_base);
                 m_position = m_cur_begin + val.first;
                 return value_type(m_position, m_cur_base + val.second);
@@ -182,7 +182,7 @@ struct uniform_partitioned_sequence {
         value_type PISA_ALWAYSINLINE next() {
             ++m_position;
 
-            if (PISA_LIKELY(m_position < m_cur_end)) {
+            if PISA_LIKELY (m_position < m_cur_end) {
                 uint64_t val = m_cur_base + m_partition_enum.next().second;
                 return value_type(m_position, val);
             }
@@ -192,7 +192,7 @@ struct uniform_partitioned_sequence {
         uint64_t size() const { return m_size; }
 
         uint64_t prev_value() const {
-            if (PISA_UNLIKELY(m_position == m_cur_begin)) {
+            if PISA_UNLIKELY (m_position == m_cur_begin) {
                 return m_cur_partition != 0U ? m_cur_base - 1 : 0;
             }
             return m_cur_base + m_partition_enum.prev_value();
@@ -205,7 +205,7 @@ struct uniform_partitioned_sequence {
         // tight loops, on microbenchmarks this causes an improvement of
         // about 3ns on my i7 3Ghz
         value_type PISA_NOINLINE slow_next() {
-            if (PISA_UNLIKELY(m_position == m_size)) {
+            if PISA_UNLIKELY (m_position == m_size) {
                 assert(m_cur_partition == m_partitions - 1);
                 auto val = m_partition_enum.next();
                 assert(val.first == m_partition_enum.size());
diff --git a/include/pisa/topk_queue.hpp b/include/pisa/topk_queue.hpp
index 6cf22bed..80795ffc 100644
--- a/include/pisa/topk_queue.hpp
+++ b/include/pisa/topk_queue.hpp
@@ -43,13 +43,13 @@ struct topk_queue {
     /// If the heap is full, the entry with the lowest value will be removed, i.e.,
     /// the heap will maintain its size.
     auto insert(Score score, DocId docid = 0) -> bool {
-        if (PISA_UNLIKELY(not would_enter(score))) {
+        if PISA_UNLIKELY (not would_enter(score)) {
             return false;
         }
         m_q.emplace_back(score, docid);
-        if (PISA_UNLIKELY(m_q.size() <= m_k)) {
+        if PISA_UNLIKELY (m_q.size() <= m_k) {
             std::push_heap(m_q.begin(), m_q.end(), min_heap_order);
-            if (PISA_UNLIKELY(m_q.size() == m_k)) {
+            if PISA_UNLIKELY (m_q.size() == m_k) {
                 m_effective_threshold = m_q.front().first;
             }
         } else {
diff --git a/include/pisa/util/likely.hpp b/include/pisa/util/likely.hpp
index 6535782e..e224927e 100644
--- a/include/pisa/util/likely.hpp
+++ b/include/pisa/util/likely.hpp
@@ -1,7 +1,10 @@
 #pragma once
 
 //  Likeliness annotations
-#if defined(__GNUC__)
+#if defined(PISA_CXX20)
+    #define PISA_LIKELY(x) (x) [[likely]]
+    #define PISA_UNLIKELY(x) (x) [[unlikely]]
+#elif defined(__GNUC__)
     #define PISA_LIKELY(x) (__builtin_expect(!!(x), 1))
     #define PISA_UNLIKELY(x) (__builtin_expect(!!(x), 0))
 #else