facebookincubator · anandamideShakyan · Feb 4, 2025 · Feb 4, 2025
diff --git a/velox/exec/SortedAggregations.cpp b/velox/exec/SortedAggregations.cpp
@@ -201,9 +201,9 @@ void SortedAggregations::initializeNewGroups(
     char** groups,
     folly::Range<const vector_size_t*> indices) {
   for (auto i : indices) {
-    groups[i][nullByte_] |= nullMask_;
+    groups[i][nullByte_] = static_cast<uint8_t>(groups[i][nullByte_]) | nullMask_;
     new (groups[i] + offset_) RowPointers();
-    groups[i][initializedByte_] |= initializedMask_;
+    groups[i][initializedByte_] = static_cast<uint8_t>(groups[i][initializedByte_]) | initializedMask_;
   }
 
   for (const auto& [sortingSpec, aggregates] : aggregates_) {

diff --git a/velox/exec/Spiller.cpp b/velox/exec/Spiller.cpp
@@ -212,7 +212,7 @@ std::unique_ptr<SpillerBase::SpillStatus> SpillerBase::writeSpill(
   // Target size of a single vector of spilled content. One of
   // these will be materialized at a time for each stream of the
   // merge.
-  constexpr int32_t kTargetBatchBytes = 1 << 18; // 256K
+  constexpr int32_t kTargetBatchBytes = 1u << 18; // 256K
   constexpr int32_t kTargetBatchRows = 64;
 
   RowVectorPtr spillVector;

diff --git a/velox/exec/VectorHasher.cpp b/velox/exec/VectorHasher.cpp
@@ -487,7 +487,7 @@ void VectorHasher::lookupIdsRangeSimd(
         uint64_t outOfRange =
             simd::toBitMask(lower > values) | simd::toBitMask(values > upper);
         if (outOfRange) {
-          bits[index / 64] &= ~(outOfRange << (index & 63));
+          bits[index / 64] &= ~(outOfRange << (static_cast<uint64_t>(index) & 63UL));
         }
         if (outOfRange != bits::lowMask(xsimd::batch<T>::size)) {
           if constexpr (sizeof(T) == 8) {

diff --git a/velox/experimental/wave/vector/WaveVector.cpp b/velox/experimental/wave/vector/WaveVector.cpp
@@ -101,7 +101,7 @@ void WaveVector::toOperand(Operand* operand) const {
     return;
   }
   if (encoding_ == VectorEncoding::Simple::FLAT) {
-    operand->indexMask = ~0;
+    operand->indexMask = ~0u;
     operand->base = values_->as<int64_t>();
     operand->indices = nullptr;
   } else {

diff --git a/velox/expression/CastExpr.cpp b/velox/expression/CastExpr.cpp
@@ -904,10 +904,10 @@ VectorPtr CastExpr::applyIntToBinaryCast(
   applyToSelectedNoThrowLocal(context, rows, result, [&](vector_size_t row) {
     TInput input = simpleInput->valueAt(row);
     if constexpr (std::is_same_v<TInput, int8_t>) {
-      inlined[0] = static_cast<char>(input & 0xFF);
+      inlined[0] = static_cast<char>(input & static_cast<uint64_t>(0xFF));
     } else {
       for (int i = sizeof(TInput) - 1; i >= 0; --i) {
-        inlined[i] = static_cast<char>(input & 0xFF);
+        inlined[i] = static_cast<char>(input & static_cast<uint64_t>(0xFF));
         input >>= 8;
       }
     }

diff --git a/velox/external/md5/md5.cpp b/velox/external/md5/md5.cpp
@@ -167,7 +167,7 @@ namespace facebook::velox::crypto {
         }
         bits[1] += len >> 29;
 
-        t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */
+        t = (t >> 3) & static_cast<uint32_t>(0x3f); /* Bytes already in shsInfo->data */
 
         /* Handle any leading odd-sized chunks */
 
@@ -209,7 +209,7 @@ namespace facebook::velox::crypto {
         unsigned char *p;
 
         /* Compute number of bytes mod 64 */
-        count = (bits[0] >> 3) & 0x3F;
+        count = (bits[0] >> 3) & static_cast<uint32_t>(0x3F);
 
         /* Set the first char of padding to 0x80.  This is safe since there is
            always at least one byte free */
@@ -248,17 +248,17 @@ namespace facebook::velox::crypto {
         int i, j;
 
         for (j = i = 0; i < MD5_HASH_LENGTH_BINARY; i++) {
-            int a = digest[i];
-            zbuf[j++] = HEX_CODES[(a >> 4) & 0xf];
-            zbuf[j++] = HEX_CODES[a & 0xf];
+            uint32_t a = digest[i];
+            zbuf[j++] = HEX_CODES[(a >> 4) & static_cast<uint32_t>(0xf)];
+            zbuf[j++] = HEX_CODES[a & static_cast<uint32_t>(0xf)];
         }
     }
 
     std::string MD5Context::DigestToBase10(const unsigned char* digest) {
       __uint128_t val = 0;
       for (int i = 0; i < MD5_HASH_LENGTH_BINARY; i++) {
-        val = static_cast<__uint128_t>(val << 4) | ((digest[i] >> 4) & 0xf);
-        val = static_cast<__uint128_t>(val << 4) | (digest[i] & 0xf);
+        val = static_cast<__uint128_t>(val << 4) | ((digest[i] >> 4) & static_cast<uint8_t>(0xf));
+        val = static_cast<__uint128_t>(val << 4) | (digest[i] & static_cast<uint8_t>(0xf));
       }
       auto dec = folly::to<std::string>(static_cast<__uint128_t>(val));
       return dec;

diff --git a/velox/external/timsort/TimSort.hpp b/velox/external/timsort/TimSort.hpp
@@ -183,7 +183,7 @@ class TimSort {
 
     diff_t r = 0;
     while (n >= 2 * MIN_MERGE) {
-      r |= (n & 1);
+      r |= (static_cast<uint32_t>(n) & 1u);
       n >>= 1;
     }
     return n + r;

diff --git a/velox/functions/prestosql/Zip.cpp b/velox/functions/prestosql/Zip.cpp
@@ -88,8 +88,8 @@ class ZipFunction : public exec::VectorFunction {
       vector_size_t offset = -1;
       for (int i = 0; i < numInputArrays; i++) {
         vector_size_t size = rawSizes[i][indices[i][row]];
-        allSameSize &= i == 0 || maxSize == size;
-        allSameOffsets &= i == 0 || offset == rawOffsets[i][indices[i][row]];
+        allSameSize = static_cast<uint32_t>(allSameSize) & static_cast<uint32_t>(i) == 0 || maxSize == size;
+        allSameOffsets = static_cast<uint32_t>(allSameOffsets) & static_cast<uint32_t>(i) == 0 || offset == rawOffsets[i][indices[i][row]];
         maxSize = std::max(maxSize, size);
         offset = rawOffsets[i][indices[i][row]];
       }

diff --git a/velox/functions/prestosql/aggregates/ApproxPercentileAggregate.cpp b/velox/functions/prestosql/aggregates/ApproxPercentileAggregate.cpp
@@ -185,7 +185,7 @@ struct KllSketchAccumulator {
 };
 
 void checkWeight(int64_t weight) {
-  constexpr int64_t kMaxWeight = (1ll << 60) - 1;
+  constexpr int64_t kMaxWeight = (1ULL << 60) - 1;
   VELOX_USER_CHECK(
       1 <= weight && weight <= kMaxWeight,
       "{}: weight must be in range [1, {}], got {}",

diff --git a/velox/functions/sparksql/Hash.cpp b/velox/functions/sparksql/Hash.cpp
@@ -511,14 +511,14 @@ class XxHash64 final {
 
   static uint64_t hashInt32(const int32_t input, uint64_t seed) {
     int64_t hash = seed + PRIME64_5 + 4L;
-    hash ^= static_cast<int64_t>((input & 0xFFFFFFFFL) * PRIME64_1);
+    hash ^= static_cast<int64_t>((static_cast<uint64_t>(input) & static_cast<uint64_t>(0xFFFFFFFFL)) * PRIME64_1);
     hash = bits::rotateLeft64(hash, 23) * PRIME64_2 + PRIME64_3;
     return fmix(hash);
   }
 
   static uint64_t hashInt64(int64_t input, uint64_t seed) {
     int64_t hash = seed + PRIME64_5 + 8L;
-    hash ^= bits::rotateLeft64(input * PRIME64_2, 31) * PRIME64_1;
+    hash = static_cast<uint64_t>(hash) ^ bits::rotateLeft64(input * PRIME64_2, 31) * PRIME64_1;
     hash = bits::rotateLeft64(hash, 27) * PRIME64_1 + PRIME64_4;
     return fmix(hash);
   }
@@ -541,7 +541,7 @@ class XxHash64 final {
 
     uint64_t hash = hashBytesByWords(input, seed);
     uint32_t length = input.size();
-    auto offset = i + (length & -8);
+    auto offset = i + (length & ~7u);
     if (offset + 4L <= end) {
       hash ^= (*reinterpret_cast<const uint64_t*>(offset) & 0xFFFFFFFFL) *
           PRIME64_1;

diff --git a/velox/functions/sparksql/String.cpp b/velox/functions/sparksql/String.cpp
@@ -148,8 +148,8 @@ void encodeDigestToBase16(uint8_t* output, int digestSize) {
   static unsigned char const kHexCodes[] = "0123456789abcdef";
   for (int i = digestSize - 1; i >= 0; --i) {
     int digestChar = output[i];
-    output[i * 2] = kHexCodes[(digestChar >> 4) & 0xf];
-    output[i * 2 + 1] = kHexCodes[digestChar & 0xf];
+    output[i * 2] = kHexCodes[(static_cast<uint32_t>(digestChar) >> 4) & static_cast<uint32_t>(0xf)];
+    output[i * 2 + 1] = kHexCodes[static_cast<uint32_t>(digestChar) & static_cast<uint32_t>(0xf)];
   }
 }
 

diff --git a/velox/functions/sparksql/aggregates/BitwiseXorAggregate.cpp b/velox/functions/sparksql/aggregates/BitwiseXorAggregate.cpp
@@ -54,7 +54,7 @@ class BitwiseXorAggregate : public BitwiseAggregateBase<T> {
         args[0],
         [](T& result, T value) { result ^= value; },
         [](T& result, T value, int n) {
-          if ((n & 1) == 1) {
+          if ((static_cast<uint32_t>(n) & 1u) == 1) {
             result ^= value;
           }
         },

diff --git a/velox/row/UnsafeRowFast.cpp b/velox/row/UnsafeRowFast.cpp
@@ -368,7 +368,7 @@ int32_t UnsafeRowFast::serializeAsArray(
             offset + i, buffer + variableWidthOffset);
 
         // Write size and offset.
-        uint64_t sizeAndOffset = variableWidthOffset << 32 | serializedBytes;
+        uint64_t sizeAndOffset = (static_cast<uint64_t>(variableWidthOffset) << 32) | static_cast<uint64_t>(serializedBytes);
         reinterpret_cast<uint64_t*>(buffer + fixedWidthOffset)[i] =
             sizeAndOffset;
 
@@ -416,7 +416,7 @@ int32_t UnsafeRowFast::serializeRow(vector_size_t index, char* buffer) const {
       auto size = child.serializeVariableWidth(
           childIndex, buffer + variableWidthOffset);
       // Write size and offset.
-      uint64_t sizeAndOffset = variableWidthOffset << 32 | size;
+      uint64_t sizeAndOffset = static_cast<uint64_t>(variableWidthOffset) << 32 | static_cast<uint64_t>(size);
       reinterpret_cast<uint64_t*>(buffer + rowNullBytes_)[i] = sizeAndOffset;
 
       variableWidthOffset += alignBytes(size);

diff --git a/velox/tpch/gen/dbgen/bm_utils.cpp b/velox/tpch/gen/dbgen/bm_utils.cpp
@@ -134,8 +134,8 @@ void tpch_a_rnd(int min, int max, seed_t* seed, char* dest) {
   for (i = 0; i < len; i++) {
     if (i % 5 == 0)
       RANDOM(char_int, 0, MAX_LONG, seed);
-    *(dest + i) = alpha_num[char_int & 077];
-    char_int >>= 6;
+    *(dest + i) = alpha_num[static_cast<uint64_t>(char_int) & static_cast<uint64_t>(077)];
+    char_int = static_cast<uint64_t>(char_int) >> 6;
   }
   *(dest + len) = '\0';
   return;

diff --git a/velox/tpch/gen/dbgen/build.cpp b/velox/tpch/gen/dbgen/build.cpp
@@ -125,11 +125,11 @@ void mk_sparse(DSS_HUGE i, DSS_HUGE* ok, long seq) {
   long low_bits;
 
   *ok = i;
-  low_bits = static_cast<long>(i & ((1 << SPARSE_KEEP) - 1));
-  *ok = *ok >> SPARSE_KEEP;
-  *ok = *ok << SPARSE_BITS;
+  low_bits = static_cast<long>(static_cast<uint64_t>(i) & static_cast<uint64_t>((1UL << SPARSE_KEEP) - 1));
+  *ok = static_cast<uint64_t>(*ok) >> SPARSE_KEEP;
+  *ok = static_cast<uint64_t>(*ok) << SPARSE_BITS;
   *ok += seq;
-  *ok = *ok << SPARSE_KEEP;
+  *ok = static_cast<uint64_t>(*ok) << SPARSE_KEEP;
   *ok += low_bits;
 
   return;

diff --git a/velox/tpch/gen/dbgen/rng64.cpp b/velox/tpch/gen/dbgen/rng64.cpp
@@ -140,14 +140,14 @@ DSS_HUGE AdvanceRand64(DSS_HUGE nSeed, DSS_HUGE nCount) {
   /* */
 
   /* first get the highest non-zero bit */
-  for (nBit = 0; (nCount >> nBit) != RNG_C; nBit++) {
+  for (nBit = 0; (static_cast<uint64_t>(nCount) >> nBit) != RNG_C; nBit++) {
   }
 
   /* go 1 bit at the time */
   while (--nBit >= 0) {
     Dsum *= (Apow + 1);
     Apow = Apow * Apow;
-    if (((nCount >> nBit) % 2) == 1) { /* odd value */
+    if (((static_cast<uint64_t>(nCount) >> nBit) % 2) == 1) { /* odd value */
       Dsum += Apow;
       Apow *= a;
     }

diff --git a/velox/type/Filter.cpp b/velox/type/Filter.cpp
@@ -853,9 +853,9 @@ bool BigintValuesUsingHashTable::testInt64(int64_t value) const {
   if (value < min_ || value > max_) {
     return false;
   }
-  uint32_t pos = (value * M) & sizeMask_;
+  uint32_t pos = (value * M) & static_cast<uint32_t>(sizeMask_);
   for (auto i = pos; i <= pos + sizeMask_; i++) {
-    int32_t idx = i & sizeMask_;
+    int32_t idx = i & static_cast<uint32_t>(sizeMask_);
     int64_t l = hashTable_[idx];
     if (l == kEmptyMarker) {
       return false;
@@ -882,7 +882,7 @@ xsimd::batch_bool<int64_t> BigintValuesUsingHashTable::testValues(
   auto indices = simd::reinterpretBatch<int64_t>(
       simd::reinterpretBatch<uint64_t>(x) * M & sizeMask_);
   auto data =
-      simd::maskGather(allEmpty, ~outOfRange, hashTable_.data(), indices);
+      simd::maskGather(allEmpty, ~static_cast<uint64_t>(outOfRange), hashTable_.data(), indices);
   // The lanes with kEmptyMarker missed, the lanes matching x hit and the other
   // lanes must check next positions.
 
@@ -911,11 +911,11 @@ xsimd::batch_bool<int64_t> BigintValuesUsingHashTable::testValues(
       auto line = xsimd::load_unaligned(hashTable_.data() + index);
 
       if (simd::toBitMask(line == allValue)) {
-        resultBits |= 1 << lane;
+        resultBits = static_cast<uint32_t>(resultBits) | (1u << lane);
         break;
       }
       if (simd::toBitMask(line == allEmpty)) {
-        resultBits &= ~(1 << lane);
+        resultBits = static_cast<uint32_t>(resultBits) & ~(1u << lane);
         break;
       }
       index += line.size;
@@ -935,7 +935,7 @@ xsimd::batch_bool<int32_t> BigintValuesUsingHashTable::testValues(
   auto first = simd::toBitMask(testValues(simd::getHalf<int64_t, 0>(x)));
   auto second = simd::toBitMask(testValues(simd::getHalf<int64_t, 1>(x)));
   return simd::fromBitMask<int32_t>(
-      first | (second << xsimd::batch<int64_t>::size));
+      static_cast<uint32_t>(first) | (static_cast<uint32_t>(second) << xsimd::batch<int64_t>::size));
 }
 
 bool BigintValuesUsingHashTable::testInt64Range(

diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp
@@ -1127,7 +1127,7 @@ bool isAllSameFlat<TypeKind::BOOLEAN>(
   auto* bits = values->as<uint64_t>();
   // Check the all true and all false separately. Easier for compiler if the
   // last argument is constant.
-  if ((bits[0] & 1) == 1) {
+  if ((bits[0] & 1ULL) == 1) {
     return bits ::isAllSet(bits, 0, size, true);
   }
   return bits ::isAllSet(bits, 0, size, false);

diff --git a/velox/vector/arrow/Bridge.cpp b/velox/vector/arrow/Bridge.cpp
@@ -35,7 +35,7 @@ namespace {
 static constexpr size_t kMaxBuffers{3};
 
 void clearNullableFlag(int64_t& flags) {
-  flags = flags & (~ARROW_FLAG_NULLABLE);
+  flags = static_cast<uint64_t>(flags) & (~static_cast<uint64_t>(ARROW_FLAG_NULLABLE));
 }
 
 // Structure that will hold the buffers needed by ArrowArray. This is opaquely