Skip to content

Commit

Permalink
Add support for _mm256_sra_epi16
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Piolat committed May 5, 2024
1 parent 52906ba commit eee97e2
Showing 1 changed file with 39 additions and 1 deletion.
40 changes: 39 additions & 1 deletion source/inteli/avx2intrin.d
Original file line number Diff line number Diff line change
Expand Up @@ -2895,7 +2895,45 @@ unittest
// TODO __m256i _mm256_sllv_epi32 (__m256i a, __m256i count) pure @safe
// TODO __m128i _mm_sllv_epi64 (__m128i a, __m128i count) pure @safe
// TODO __m256i _mm256_sllv_epi64 (__m256i a, __m256i count) pure @safe
// TODO __m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @safe

/// Shift packed 16-bit integers in `a` right by `count` while shifting in sign bits.
/// Bit-shift is a single value in the low-order 64-bit of `count`.
/// If bit-shift > 15, result is defined to be all sign bits.
/// Warning: prefer `_mm256_srai_epi16`, less of a trap.
__m256i _mm256_sra_epi16 (__m256i a, __m128i count) pure @trusted
{
static if (GDC_or_LDC_with_AVX2)
{
return cast(__m256i) __builtin_ia32_psraw256(cast(short16)a, cast(short8)count);
}
else
{
// split
__m128i a_lo = _mm256_extractf128_si256!0(a);
__m128i a_hi = _mm256_extractf128_si256!1(a);
__m128i r_lo = _mm_sra_epi16(a_lo, count);
__m128i r_hi = _mm_sra_epi16(a_hi, count);
return _mm256_set_m128i(r_hi, r_lo);
}
}
unittest
{
__m128i shift0 = _mm_setzero_si128();
__m128i shiftX = _mm_set1_epi64x(0x8000_0000_0000_0000); // too large shift
__m128i shift2 = _mm_setr_epi32(2, 0, 4, 5);
__m256i A = _mm256_setr_epi16(4, -9, 11, -32768, 4, -8, 11, -32768,
4, -9, 11, -32768, 4, -8, 11, -32768);
short[16] correct0 = (cast(short16)A).array;
short[16] correctX = [0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1];
short[16] correct2 = [1, -3, 2, -8192, 1, -2, 2, -8192, 1, -3, 2, -8192, 1, -2, 2, -8192];
short16 B0 = cast(short16) _mm256_sra_epi16(A, shift0);
short16 BX = cast(short16) _mm256_sra_epi16(A, shiftX);
short16 B2 = cast(short16) _mm256_sra_epi16(A, shift2);
assert(B0.array == correct0);
assert(BX.array == correctX);
assert(B2.array == correct2);
}

// TODO __m256i _mm256_sra_epi32 (__m256i a, __m128i count) pure @safe

/// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign bits.
Expand Down

0 comments on commit eee97e2

Please sign in to comment.