diff --git a/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S b/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S index b2e31bd9e8..6fe4a2bfc2 100644 --- a/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +++ b/generated-src/linux-x86_64/crypto/fipsmodule/aesni-xts-avx512.S @@ -25,92 +25,47 @@ aes_hw_xts_encrypt_avx512: vmovdqu (%r8),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqu (%rcx),%xmm2 - vmovdqa %xmm2,128(%rsp) - vmovdqu 16(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 16(%rcx),%xmm2 - vmovdqa %xmm2,144(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 32(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 32(%rcx),%xmm2 - vmovdqa %xmm2,160(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 48(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 48(%rcx),%xmm2 - vmovdqa %xmm2,176(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 64(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 64(%rcx),%xmm2 - vmovdqa %xmm2,192(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 80(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 80(%rcx),%xmm2 - vmovdqa %xmm2,208(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 96(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 96(%rcx),%xmm2 - vmovdqa %xmm2,224(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 112(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 112(%rcx),%xmm2 - vmovdqa %xmm2,240(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 128(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 128(%rcx),%xmm2 - vmovdqa %xmm2,256(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 144(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 144(%rcx),%xmm2 - vmovdqa %xmm2,272(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 160(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 160(%rcx),%xmm2 - vmovdqa %xmm2,288(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 176(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 176(%rcx),%xmm2 - vmovdqa %xmm2,304(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 192(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 192(%rcx),%xmm2 - vmovdqa %xmm2,320(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 208(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 208(%rcx),%xmm2 - vmovdqa %xmm2,336(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 224(%r8),%xmm0 -.byte 98,242,117,8,221,200 - - vmovdqu 224(%rcx),%xmm2 - vmovdqa %xmm2,352(%rsp) + vaesenclast %xmm0,%xmm1,%xmm1 vmovdqa %xmm1,(%rsp) @@ -155,76 +110,76 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -251,76 +206,76 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -347,76 +302,76 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -442,76 +397,76 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -537,66 +492,66 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -616,51 +571,51 @@ aes_hw_xts_encrypt_avx512: addq $0x20,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -675,36 +630,36 @@ aes_hw_xts_encrypt_avx512: vmovdqu (%rdi),%xmm1 addq $0x10,%rdi vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -722,20 +677,20 @@ aes_hw_xts_encrypt_avx512: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm11 vpxord %zmm14,%zmm11,%zmm11 vpsrldq $0xf,%zmm10,%zmm15 -.byte 98,131,5,72,68,193,0 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 vpslldq $0x1,%zmm10,%zmm12 vpxord %zmm16,%zmm12,%zmm12 @@ -749,97 +704,97 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 vpxorq %zmm12,%zmm4,%zmm4 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpxorq %zmm0,%zmm3,%zmm3 vpxorq %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm11,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm11,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm12,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm12,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm15,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm15,%zmm17 vpxord %zmm14,%zmm17,%zmm17 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm16,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm16,%zmm18 vpxord %zmm14,%zmm18,%zmm18 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 -.byte 98,242,101,72,221,216 -.byte 98,242,93,72,221,224 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 @@ -870,12 +825,12 @@ aes_hw_xts_encrypt_avx512: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 @@ -888,84 +843,84 @@ aes_hw_xts_encrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm10,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm10,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -1008,21 +963,21 @@ aes_hw_xts_encrypt_avx512: vpshufb %xmm10,%xmm3,%xmm3 vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 vpxor %xmm0,%xmm3,%xmm8 - vpxor 128(%rsp),%xmm8,%xmm8 -.byte 98,114,61,8,220,132,36,144,0,0,0 -.byte 98,114,61,8,220,132,36,160,0,0,0 -.byte 98,114,61,8,220,132,36,176,0,0,0 -.byte 98,114,61,8,220,132,36,192,0,0,0 -.byte 98,114,61,8,220,132,36,208,0,0,0 -.byte 98,114,61,8,220,132,36,224,0,0,0 -.byte 98,114,61,8,220,132,36,240,0,0,0 -.byte 98,114,61,8,220,132,36,0,1,0,0 -.byte 98,114,61,8,220,132,36,16,1,0,0 -.byte 98,114,61,8,220,132,36,32,1,0,0 -.byte 98,114,61,8,220,132,36,48,1,0,0 -.byte 98,114,61,8,220,132,36,64,1,0,0 -.byte 98,114,61,8,220,132,36,80,1,0,0 -.byte 98,114,61,8,221,132,36,96,1,0,0 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenc 160(%rcx),%xmm8,%xmm8 + vaesenc 176(%rcx),%xmm8,%xmm8 + vaesenc 192(%rcx),%xmm8,%xmm8 + vaesenc 208(%rcx),%xmm8,%xmm8 + vaesenclast 224(%rcx),%xmm8,%xmm8 vpxor %xmm0,%xmm8,%xmm8 vmovdqu %xmm8,-16(%rsi) .L_ret_hEgxyDlCngwrfFe: @@ -1076,7 +1031,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1085,7 +1040,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1094,7 +1049,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1103,7 +1058,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -1112,7 +1067,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 xorq %r11,%r11 @@ -1121,7 +1076,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,96(%rsp) - movq %rbx,104(%rsp) + movq %rbx,96 + 8(%rsp) vmovdqa 96(%rsp),%xmm15 vmovdqu 96(%rdi),%xmm7 addq $0x70,%rdi @@ -1132,7 +1087,7 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -1140,118 +1095,118 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 -.byte 98,242,77,8,221,240 -.byte 98,242,69,8,221,248 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vaesenclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1283,7 +1238,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1292,7 +1247,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1301,7 +1256,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1310,7 +1265,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -1319,7 +1274,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 addq $0x60,%rdi @@ -1329,111 +1284,111 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 -.byte 98,242,77,8,221,240 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1463,7 +1418,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1472,7 +1427,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1481,7 +1436,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1490,7 +1445,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 addq $0x50,%rdi @@ -1499,96 +1454,96 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1616,7 +1571,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1625,7 +1580,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1634,7 +1589,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 addq $0x40,%rdi @@ -1642,81 +1597,81 @@ aes_hw_xts_encrypt_avx512: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1742,7 +1697,7 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1751,73 +1706,73 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 addq $0x30,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1841,57 +1796,57 @@ aes_hw_xts_encrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 addq $0x20,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -1909,36 +1864,36 @@ aes_hw_xts_encrypt_avx512: vmovdqu 0(%rdi),%xmm1 addq $0x10,%rdi vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -1966,92 +1921,47 @@ aes_hw_xts_decrypt_avx512: vmovdqu (%r8),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqu 224(%rcx),%xmm2 - vmovdqa %xmm2,352(%rsp) - vmovdqu 16(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 208(%rcx),%xmm2 - vmovdqa %xmm2,336(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 32(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 192(%rcx),%xmm2 - vmovdqa %xmm2,320(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 48(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 176(%rcx),%xmm2 - vmovdqa %xmm2,304(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 64(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 160(%rcx),%xmm2 - vmovdqa %xmm2,288(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 80(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 144(%rcx),%xmm2 - vmovdqa %xmm2,272(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 96(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 128(%rcx),%xmm2 - vmovdqa %xmm2,256(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 112(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 112(%rcx),%xmm2 - vmovdqa %xmm2,240(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 128(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 96(%rcx),%xmm2 - vmovdqa %xmm2,224(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 144(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 80(%rcx),%xmm2 - vmovdqa %xmm2,208(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 160(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 64(%rcx),%xmm2 - vmovdqa %xmm2,192(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 176(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 48(%rcx),%xmm2 - vmovdqa %xmm2,176(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 192(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 32(%rcx),%xmm2 - vmovdqa %xmm2,160(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 208(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 16(%rcx),%xmm2 - vmovdqa %xmm2,144(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 224(%r8),%xmm0 -.byte 98,242,117,8,221,200 - - vmovdqu (%rcx),%xmm2 - vmovdqa %xmm2,128(%rsp) + vaesenclast %xmm0,%xmm1,%xmm1 vmovdqa %xmm1,(%rsp) @@ -2084,36 +1994,36 @@ aes_hw_xts_decrypt_avx512: vmovdqu %xmm5,%xmm1 vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,-16(%rsi) vmovdqa %xmm1,%xmm8 @@ -2148,76 +2058,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2239,76 +2149,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2335,76 +2245,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2426,76 +2336,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2521,76 +2431,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2612,76 +2522,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2706,76 +2616,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2796,76 +2706,76 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2890,66 +2800,66 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -2967,66 +2877,66 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3045,51 +2955,51 @@ aes_hw_xts_decrypt_avx512: vextracti32x4 $0x1,%zmm9,%xmm12 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -3103,51 +3013,51 @@ aes_hw_xts_decrypt_avx512: vextracti32x4 $0x1,%zmm9,%xmm10 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -3161,36 +3071,36 @@ aes_hw_xts_decrypt_avx512: je .L_done_1_remain_amivrujEyduiFoi vextracti32x4 $0x1,%zmm9,%xmm11 vpxor %xmm11,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm11,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -3200,36 +3110,36 @@ aes_hw_xts_decrypt_avx512: .L_done_1_remain_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) jmp .L_ret_amivrujEyduiFoi @@ -3244,25 +3154,25 @@ aes_hw_xts_decrypt_avx512: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm11 vpxord %zmm14,%zmm11,%zmm11 vpsrldq $0xf,%zmm10,%zmm15 -.byte 98,131,5,72,68,193,0 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 vpslldq $0x1,%zmm10,%zmm12 vpxord %zmm16,%zmm12,%zmm12 @@ -3277,97 +3187,97 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 vpxorq %zmm12,%zmm4,%zmm4 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpxorq %zmm0,%zmm3,%zmm3 vpxorq %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm11,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm11,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm12,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm12,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm15,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm15,%zmm17 vpxord %zmm14,%zmm17,%zmm17 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm16,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm16,%zmm18 vpxord %zmm14,%zmm18,%zmm18 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 -.byte 98,242,101,72,223,216 -.byte 98,242,93,72,223,224 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 @@ -3401,14 +3311,14 @@ aes_hw_xts_decrypt_avx512: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 @@ -3422,84 +3332,84 @@ aes_hw_xts_decrypt_avx512: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm10,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm10,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -3543,21 +3453,21 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm0,%xmm3,%xmm8 - vpxor 128(%rsp),%xmm8,%xmm8 -.byte 98,114,61,8,222,132,36,144,0,0,0 -.byte 98,114,61,8,222,132,36,160,0,0,0 -.byte 98,114,61,8,222,132,36,176,0,0,0 -.byte 98,114,61,8,222,132,36,192,0,0,0 -.byte 98,114,61,8,222,132,36,208,0,0,0 -.byte 98,114,61,8,222,132,36,224,0,0,0 -.byte 98,114,61,8,222,132,36,240,0,0,0 -.byte 98,114,61,8,222,132,36,0,1,0,0 -.byte 98,114,61,8,222,132,36,16,1,0,0 -.byte 98,114,61,8,222,132,36,32,1,0,0 -.byte 98,114,61,8,222,132,36,48,1,0,0 -.byte 98,114,61,8,222,132,36,64,1,0,0 -.byte 98,114,61,8,222,132,36,80,1,0,0 -.byte 98,114,61,8,223,132,36,96,1,0,0 + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdec 160(%rcx),%xmm8,%xmm8 + vaesdec 176(%rcx),%xmm8,%xmm8 + vaesdec 192(%rcx),%xmm8,%xmm8 + vaesdec 208(%rcx),%xmm8,%xmm8 + vaesdeclast 224(%rcx),%xmm8,%xmm8 vpxor %xmm0,%xmm8,%xmm8 @@ -3617,7 +3527,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -3626,7 +3536,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -3635,7 +3545,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -3644,7 +3554,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -3653,7 +3563,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 xorq %r11,%r11 @@ -3662,7 +3572,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,96(%rsp) - movq %rbx,104(%rsp) + movq %rbx,96 + 8(%rsp) vmovdqa 96(%rsp),%xmm15 vmovdqu 96(%rdi),%xmm7 addq $0x70,%rdi @@ -3686,7 +3596,7 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -3694,118 +3604,118 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 -.byte 98,242,69,8,223,248 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3832,7 +3742,7 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -3840,118 +3750,118 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 -.byte 98,242,69,8,223,248 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3980,7 +3890,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -3989,7 +3899,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -3998,7 +3908,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -4007,7 +3917,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -4016,7 +3926,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 addq $0x60,%rdi @@ -4039,111 +3949,111 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4167,111 +4077,111 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4298,7 +4208,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4307,7 +4217,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -4316,7 +4226,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -4325,7 +4235,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 addq $0x50,%rdi @@ -4347,96 +4257,96 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4457,96 +4367,96 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4571,7 +4481,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4580,7 +4490,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -4589,7 +4499,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 addq $0x40,%rdi @@ -4610,81 +4520,81 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4702,81 +4612,81 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4799,7 +4709,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4808,7 +4718,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 addq $0x30,%rdi @@ -4828,66 +4738,66 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4902,66 +4812,66 @@ aes_hw_xts_decrypt_avx512: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4982,7 +4892,7 @@ aes_hw_xts_decrypt_avx512: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 addq $0x20,%rdi @@ -5001,51 +4911,51 @@ aes_hw_xts_decrypt_avx512: vmovdqa 16(%rsp),%xmm10 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -5057,51 +4967,51 @@ aes_hw_xts_decrypt_avx512: .L_done_2_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -5129,36 +5039,36 @@ aes_hw_xts_decrypt_avx512: vmovdqa64 %xmm9,%xmm10 vmovdqa 16(%rsp),%xmm9 vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 addq $0x10,%rsi vmovdqa %xmm10,%xmm0 @@ -5167,36 +5077,36 @@ aes_hw_xts_decrypt_avx512: .L_done_1_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 addq $0x10,%rsi vmovdqa %xmm1,%xmm8 diff --git a/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S b/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S index 08a62439ad..578b772761 100644 --- a/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S +++ b/generated-src/mac-x86_64/crypto/fipsmodule/aesni-xts-avx512.S @@ -25,92 +25,47 @@ _aes_hw_xts_encrypt_avx512: vmovdqu (%r8),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqu (%rcx),%xmm2 - vmovdqa %xmm2,128(%rsp) - vmovdqu 16(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 16(%rcx),%xmm2 - vmovdqa %xmm2,144(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 32(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 32(%rcx),%xmm2 - vmovdqa %xmm2,160(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 48(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 48(%rcx),%xmm2 - vmovdqa %xmm2,176(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 64(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 64(%rcx),%xmm2 - vmovdqa %xmm2,192(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 80(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 80(%rcx),%xmm2 - vmovdqa %xmm2,208(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 96(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 96(%rcx),%xmm2 - vmovdqa %xmm2,224(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 112(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 112(%rcx),%xmm2 - vmovdqa %xmm2,240(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 128(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 128(%rcx),%xmm2 - vmovdqa %xmm2,256(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 144(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 144(%rcx),%xmm2 - vmovdqa %xmm2,272(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 160(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 160(%rcx),%xmm2 - vmovdqa %xmm2,288(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 176(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 176(%rcx),%xmm2 - vmovdqa %xmm2,304(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 192(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 192(%rcx),%xmm2 - vmovdqa %xmm2,320(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 208(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 208(%rcx),%xmm2 - vmovdqa %xmm2,336(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 224(%r8),%xmm0 -.byte 98,242,117,8,221,200 - - vmovdqu 224(%rcx),%xmm2 - vmovdqa %xmm2,352(%rsp) + vaesenclast %xmm0,%xmm1,%xmm1 vmovdqa %xmm1,(%rsp) @@ -155,76 +110,76 @@ L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -251,76 +206,76 @@ L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -347,76 +302,76 @@ L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -442,76 +397,76 @@ L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -537,66 +492,66 @@ L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -616,51 +571,51 @@ L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe: addq $0x20,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -675,36 +630,36 @@ L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe: vmovdqu (%rdi),%xmm1 addq $0x10,%rdi vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -722,20 +677,20 @@ L$_start_by16_hEgxyDlCngwrfFe: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm11 vpxord %zmm14,%zmm11,%zmm11 vpsrldq $0xf,%zmm10,%zmm15 -.byte 98,131,5,72,68,193,0 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 vpslldq $0x1,%zmm10,%zmm12 vpxord %zmm16,%zmm12,%zmm12 @@ -749,97 +704,97 @@ L$_main_loop_run_16_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 vpxorq %zmm12,%zmm4,%zmm4 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpxorq %zmm0,%zmm3,%zmm3 vpxorq %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm11,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm11,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm12,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm12,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm15,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm15,%zmm17 vpxord %zmm14,%zmm17,%zmm17 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm16,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm16,%zmm18 vpxord %zmm14,%zmm18,%zmm18 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 -.byte 98,242,101,72,220,216 -.byte 98,242,93,72,220,224 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 -.byte 98,242,101,72,221,216 -.byte 98,242,93,72,221,224 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 + vaesenc %zmm0,%zmm3,%zmm3 + vaesenc %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 + vaesenclast %zmm0,%zmm3,%zmm3 + vaesenclast %zmm0,%zmm4,%zmm4 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 @@ -870,12 +825,12 @@ L$_start_by8_hEgxyDlCngwrfFe: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 @@ -888,84 +843,84 @@ L$_main_loop_run_8_hEgxyDlCngwrfFe: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm10,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm10,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,220,200 -.byte 98,242,109,72,220,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesenc %zmm0,%zmm1,%zmm1 + vaesenc %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,221,200 -.byte 98,242,109,72,221,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesenclast %zmm0,%zmm1,%zmm1 + vaesenclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -1008,21 +963,21 @@ L$_steal_cipher_hEgxyDlCngwrfFe: vpshufb %xmm10,%xmm3,%xmm3 vpblendvb %xmm10,%xmm2,%xmm3,%xmm3 vpxor %xmm0,%xmm3,%xmm8 - vpxor 128(%rsp),%xmm8,%xmm8 -.byte 98,114,61,8,220,132,36,144,0,0,0 -.byte 98,114,61,8,220,132,36,160,0,0,0 -.byte 98,114,61,8,220,132,36,176,0,0,0 -.byte 98,114,61,8,220,132,36,192,0,0,0 -.byte 98,114,61,8,220,132,36,208,0,0,0 -.byte 98,114,61,8,220,132,36,224,0,0,0 -.byte 98,114,61,8,220,132,36,240,0,0,0 -.byte 98,114,61,8,220,132,36,0,1,0,0 -.byte 98,114,61,8,220,132,36,16,1,0,0 -.byte 98,114,61,8,220,132,36,32,1,0,0 -.byte 98,114,61,8,220,132,36,48,1,0,0 -.byte 98,114,61,8,220,132,36,64,1,0,0 -.byte 98,114,61,8,220,132,36,80,1,0,0 -.byte 98,114,61,8,221,132,36,96,1,0,0 + vpxor (%rcx),%xmm8,%xmm8 + vaesenc 16(%rcx),%xmm8,%xmm8 + vaesenc 32(%rcx),%xmm8,%xmm8 + vaesenc 48(%rcx),%xmm8,%xmm8 + vaesenc 64(%rcx),%xmm8,%xmm8 + vaesenc 80(%rcx),%xmm8,%xmm8 + vaesenc 96(%rcx),%xmm8,%xmm8 + vaesenc 112(%rcx),%xmm8,%xmm8 + vaesenc 128(%rcx),%xmm8,%xmm8 + vaesenc 144(%rcx),%xmm8,%xmm8 + vaesenc 160(%rcx),%xmm8,%xmm8 + vaesenc 176(%rcx),%xmm8,%xmm8 + vaesenc 192(%rcx),%xmm8,%xmm8 + vaesenc 208(%rcx),%xmm8,%xmm8 + vaesenclast 224(%rcx),%xmm8,%xmm8 vpxor %xmm0,%xmm8,%xmm8 vmovdqu %xmm8,-16(%rsi) L$_ret_hEgxyDlCngwrfFe: @@ -1076,7 +1031,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1085,7 +1040,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1094,7 +1049,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1103,7 +1058,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -1112,7 +1067,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 xorq %r11,%r11 @@ -1121,7 +1076,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,96(%rsp) - movq %rbx,104(%rsp) + movq %rbx,96 + 8(%rsp) vmovdqa 96(%rsp),%xmm15 vmovdqu 96(%rdi),%xmm7 addq $0x70,%rdi @@ -1132,7 +1087,7 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -1140,118 +1095,118 @@ L$_num_blocks_is_7_hEgxyDlCngwrfFe: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 -.byte 98,242,69,8,220,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 -.byte 98,242,77,8,221,240 -.byte 98,242,69,8,221,248 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vaesenc %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 + vaesenclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1283,7 +1238,7 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1292,7 +1247,7 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1301,7 +1256,7 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1310,7 +1265,7 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -1319,7 +1274,7 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 addq $0x60,%rdi @@ -1329,111 +1284,111 @@ L$_num_blocks_is_6_hEgxyDlCngwrfFe: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 -.byte 98,242,77,8,220,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 -.byte 98,242,77,8,221,240 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vaesenc %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 + vaesenclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1463,7 +1418,7 @@ L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1472,7 +1427,7 @@ L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1481,7 +1436,7 @@ L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -1490,7 +1445,7 @@ L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 addq $0x50,%rdi @@ -1499,96 +1454,96 @@ L$_num_blocks_is_5_hEgxyDlCngwrfFe: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 -.byte 98,242,85,8,220,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 -.byte 98,242,85,8,221,232 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vaesenc %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 + vaesenclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1616,7 +1571,7 @@ L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1625,7 +1580,7 @@ L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -1634,7 +1589,7 @@ L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 addq $0x40,%rdi @@ -1642,81 +1597,81 @@ L$_num_blocks_is_4_hEgxyDlCngwrfFe: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 -.byte 98,242,93,8,220,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 -.byte 98,242,93,8,221,224 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vaesenc %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 + vaesenclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1742,7 +1697,7 @@ L$_num_blocks_is_3_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -1751,73 +1706,73 @@ L$_num_blocks_is_3_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 addq $0x30,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 -.byte 98,242,101,8,220,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 -.byte 98,242,101,8,221,216 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vaesenc %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 + vaesenclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -1841,57 +1796,57 @@ L$_num_blocks_is_2_hEgxyDlCngwrfFe: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 addq $0x20,%rdi vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 -.byte 98,242,109,8,220,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 -.byte 98,242,109,8,221,208 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vaesenc %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 + vaesenclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -1909,36 +1864,36 @@ L$_num_blocks_is_1_hEgxyDlCngwrfFe: vmovdqu 0(%rdi),%xmm1 addq $0x10,%rdi vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,220,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,221,200 + vmovdqu 16(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesenc %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesenclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -1966,92 +1921,47 @@ _aes_hw_xts_decrypt_avx512: vmovdqu (%r8),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqu 224(%rcx),%xmm2 - vmovdqa %xmm2,352(%rsp) - vmovdqu 16(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 208(%rcx),%xmm2 - vmovdqa %xmm2,336(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 32(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 192(%rcx),%xmm2 - vmovdqa %xmm2,320(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 48(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 176(%rcx),%xmm2 - vmovdqa %xmm2,304(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 64(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 160(%rcx),%xmm2 - vmovdqa %xmm2,288(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 80(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 144(%rcx),%xmm2 - vmovdqa %xmm2,272(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 96(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 128(%rcx),%xmm2 - vmovdqa %xmm2,256(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 112(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 112(%rcx),%xmm2 - vmovdqa %xmm2,240(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 128(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 96(%rcx),%xmm2 - vmovdqa %xmm2,224(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 144(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 80(%rcx),%xmm2 - vmovdqa %xmm2,208(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 160(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 64(%rcx),%xmm2 - vmovdqa %xmm2,192(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 176(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 48(%rcx),%xmm2 - vmovdqa %xmm2,176(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 192(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 32(%rcx),%xmm2 - vmovdqa %xmm2,160(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 208(%r8),%xmm0 -.byte 98,242,117,8,220,200 - - vmovdqu 16(%rcx),%xmm2 - vmovdqa %xmm2,144(%rsp) + vaesenc %xmm0,%xmm1,%xmm1 vmovdqu 224(%r8),%xmm0 -.byte 98,242,117,8,221,200 - - vmovdqu (%rcx),%xmm2 - vmovdqa %xmm2,128(%rsp) + vaesenclast %xmm0,%xmm1,%xmm1 vmovdqa %xmm1,(%rsp) @@ -2084,36 +1994,36 @@ L$_do_n_blocks_amivrujEyduiFoi: vmovdqu %xmm5,%xmm1 vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,-16(%rsi) vmovdqa %xmm1,%xmm8 @@ -2148,76 +2058,76 @@ L$_remaining_num_blocks_is_7_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2239,76 +2149,76 @@ L$_done_7_remain_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2335,76 +2245,76 @@ L$_remaining_num_blocks_is_6_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2426,76 +2336,76 @@ L$_done_6_remain_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2521,76 +2431,76 @@ L$_remaining_num_blocks_is_5_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2612,76 +2522,76 @@ L$_done_5_remain_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2706,76 +2616,76 @@ L$_remaining_num_blocks_is_4_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2796,76 +2706,76 @@ L$_done_4_remain_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -2890,66 +2800,66 @@ L$_remaining_num_blocks_is_3_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -2967,66 +2877,66 @@ L$_done_3_remain_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3045,51 +2955,51 @@ L$_remaining_num_blocks_is_2_amivrujEyduiFoi: vextracti32x4 $0x1,%zmm9,%xmm12 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -3103,51 +3013,51 @@ L$_done_2_remain_amivrujEyduiFoi: vextracti32x4 $0x1,%zmm9,%xmm10 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -3161,36 +3071,36 @@ L$_remaining_num_blocks_is_1_amivrujEyduiFoi: je L$_done_1_remain_amivrujEyduiFoi vextracti32x4 $0x1,%zmm9,%xmm11 vpxor %xmm11,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm11,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) addq $0x10,%rsi @@ -3200,36 +3110,36 @@ L$_remaining_num_blocks_is_1_amivrujEyduiFoi: L$_done_1_remain_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 vmovdqu %xmm1,(%rsi) jmp L$_ret_amivrujEyduiFoi @@ -3244,25 +3154,25 @@ L$_start_by16_amivrujEyduiFoi: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm11 vpxord %zmm14,%zmm11,%zmm11 vpsrldq $0xf,%zmm10,%zmm15 -.byte 98,131,5,72,68,193,0 + vpclmulqdq $0x0,%zmm25,%zmm15,%zmm16 vpslldq $0x1,%zmm10,%zmm12 vpxord %zmm16,%zmm12,%zmm12 @@ -3277,97 +3187,97 @@ L$_main_loop_run_16_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 vpxorq %zmm12,%zmm4,%zmm4 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpxorq %zmm0,%zmm3,%zmm3 vpxorq %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm11,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm11,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm12,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm12,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm15,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm15,%zmm17 vpxord %zmm14,%zmm17,%zmm17 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 vpsrldq $0xf,%zmm16,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm16,%zmm18 vpxord %zmm14,%zmm18,%zmm18 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 -.byte 98,242,101,72,222,216 -.byte 98,242,93,72,222,224 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 -.byte 98,242,101,72,223,216 -.byte 98,242,93,72,223,224 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 + vaesdec %zmm0,%zmm3,%zmm3 + vaesdec %zmm0,%zmm4,%zmm4 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 + vaesdeclast %zmm0,%zmm3,%zmm3 + vaesdeclast %zmm0,%zmm4,%zmm4 vpxorq %zmm9,%zmm1,%zmm1 vpxorq %zmm10,%zmm2,%zmm2 vpxorq %zmm11,%zmm3,%zmm3 @@ -3401,14 +3311,14 @@ L$_start_by8_amivrujEyduiFoi: vpshufb %zmm8,%zmm0,%zmm1 vpsllvq const_dq3210(%rip),%zmm0,%zmm4 vpsrlvq const_dq5678(%rip),%zmm1,%zmm2 -.byte 98,147,109,72,68,217,0 + vpclmulqdq $0x0,%zmm25,%zmm2,%zmm3 vpxorq %zmm2,%zmm4,%zmm4{%k2} vpxord %zmm4,%zmm3,%zmm9 vpsllvq const_dq7654(%rip),%zmm0,%zmm5 vpsrlvq const_dq1234(%rip),%zmm1,%zmm6 -.byte 98,147,77,72,68,249,0 + vpclmulqdq $0x0,%zmm25,%zmm6,%zmm7 vpxorq %zmm6,%zmm5,%zmm5{%k2} vpxord %zmm5,%zmm7,%zmm10 @@ -3422,84 +3332,84 @@ L$_main_loop_run_8_amivrujEyduiFoi: vpxorq %zmm10,%zmm2,%zmm2 - vbroadcasti32x4 128(%rsp),%zmm0 + vbroadcasti32x4 (%rcx),%zmm0 vpxorq %zmm0,%zmm1,%zmm1 vpxorq %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm9,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm9,%zmm15 vpxord %zmm14,%zmm15,%zmm15 - vbroadcasti32x4 144(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 16(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 160(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 32(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 176(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 48(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 vpsrldq $0xf,%zmm10,%zmm13 -.byte 98,19,21,72,68,241,0 + vpclmulqdq $0x0,%zmm25,%zmm13,%zmm14 vpslldq $0x1,%zmm10,%zmm16 vpxord %zmm14,%zmm16,%zmm16 - vbroadcasti32x4 192(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 64(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 208(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 80(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 224(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 96(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 240(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 112(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 256(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 128(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 272(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 144(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 288(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 160(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 304(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 176(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 320(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 192(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 336(%rsp),%zmm0 -.byte 98,242,117,72,222,200 -.byte 98,242,109,72,222,208 + vbroadcasti32x4 208(%rcx),%zmm0 + vaesdec %zmm0,%zmm1,%zmm1 + vaesdec %zmm0,%zmm2,%zmm2 - vbroadcasti32x4 352(%rsp),%zmm0 -.byte 98,242,117,72,223,200 -.byte 98,242,109,72,223,208 + vbroadcasti32x4 224(%rcx),%zmm0 + vaesdeclast %zmm0,%zmm1,%zmm1 + vaesdeclast %zmm0,%zmm2,%zmm2 vpxorq %zmm9,%zmm1,%zmm1 @@ -3543,21 +3453,21 @@ L$_steal_cipher_amivrujEyduiFoi: vpxor %xmm0,%xmm3,%xmm8 - vpxor 128(%rsp),%xmm8,%xmm8 -.byte 98,114,61,8,222,132,36,144,0,0,0 -.byte 98,114,61,8,222,132,36,160,0,0,0 -.byte 98,114,61,8,222,132,36,176,0,0,0 -.byte 98,114,61,8,222,132,36,192,0,0,0 -.byte 98,114,61,8,222,132,36,208,0,0,0 -.byte 98,114,61,8,222,132,36,224,0,0,0 -.byte 98,114,61,8,222,132,36,240,0,0,0 -.byte 98,114,61,8,222,132,36,0,1,0,0 -.byte 98,114,61,8,222,132,36,16,1,0,0 -.byte 98,114,61,8,222,132,36,32,1,0,0 -.byte 98,114,61,8,222,132,36,48,1,0,0 -.byte 98,114,61,8,222,132,36,64,1,0,0 -.byte 98,114,61,8,222,132,36,80,1,0,0 -.byte 98,114,61,8,223,132,36,96,1,0,0 + vpxor (%rcx),%xmm8,%xmm8 + vaesdec 16(%rcx),%xmm8,%xmm8 + vaesdec 32(%rcx),%xmm8,%xmm8 + vaesdec 48(%rcx),%xmm8,%xmm8 + vaesdec 64(%rcx),%xmm8,%xmm8 + vaesdec 80(%rcx),%xmm8,%xmm8 + vaesdec 96(%rcx),%xmm8,%xmm8 + vaesdec 112(%rcx),%xmm8,%xmm8 + vaesdec 128(%rcx),%xmm8,%xmm8 + vaesdec 144(%rcx),%xmm8,%xmm8 + vaesdec 160(%rcx),%xmm8,%xmm8 + vaesdec 176(%rcx),%xmm8,%xmm8 + vaesdec 192(%rcx),%xmm8,%xmm8 + vaesdec 208(%rcx),%xmm8,%xmm8 + vaesdeclast 224(%rcx),%xmm8,%xmm8 vpxor %xmm0,%xmm8,%xmm8 @@ -3617,7 +3527,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -3626,7 +3536,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -3635,7 +3545,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -3644,7 +3554,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -3653,7 +3563,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 xorq %r11,%r11 @@ -3662,7 +3572,7 @@ L$_num_blocks_is_7_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,96(%rsp) - movq %rbx,104(%rsp) + movq %rbx,96 + 8(%rsp) vmovdqa 96(%rsp),%xmm15 vmovdqu 96(%rdi),%xmm7 addq $0x70,%rdi @@ -3686,7 +3596,7 @@ L$_steal_cipher_7_amivrujEyduiFoi: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -3694,118 +3604,118 @@ L$_steal_cipher_7_amivrujEyduiFoi: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 -.byte 98,242,69,8,223,248 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3832,7 +3742,7 @@ L$_done_7_amivrujEyduiFoi: vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 vpxor %xmm15,%xmm7,%xmm7 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 @@ -3840,118 +3750,118 @@ L$_done_7_amivrujEyduiFoi: vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 vpxor %xmm0,%xmm7,%xmm7 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 -.byte 98,242,69,8,222,248 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 -.byte 98,242,69,8,223,248 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vaesdec %xmm0,%xmm7,%xmm7 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 + vaesdeclast %xmm0,%xmm7,%xmm7 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -3980,7 +3890,7 @@ L$_num_blocks_is_6_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -3989,7 +3899,7 @@ L$_num_blocks_is_6_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -3998,7 +3908,7 @@ L$_num_blocks_is_6_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -4007,7 +3917,7 @@ L$_num_blocks_is_6_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 xorq %r11,%r11 @@ -4016,7 +3926,7 @@ L$_num_blocks_is_6_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,80(%rsp) - movq %rbx,88(%rsp) + movq %rbx,80 + 8(%rsp) vmovdqa 80(%rsp),%xmm14 vmovdqu 80(%rdi),%xmm6 addq $0x60,%rdi @@ -4039,111 +3949,111 @@ L$_steal_cipher_6_amivrujEyduiFoi: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4167,111 +4077,111 @@ L$_done_6_amivrujEyduiFoi: vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 vpxor %xmm14,%xmm6,%xmm6 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 vpxor %xmm0,%xmm6,%xmm6 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 -.byte 98,242,77,8,222,240 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 -.byte 98,242,77,8,223,240 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vaesdec %xmm0,%xmm6,%xmm6 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 + vaesdeclast %xmm0,%xmm6,%xmm6 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4298,7 +4208,7 @@ L$_num_blocks_is_5_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4307,7 +4217,7 @@ L$_num_blocks_is_5_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -4316,7 +4226,7 @@ L$_num_blocks_is_5_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 xorq %r11,%r11 @@ -4325,7 +4235,7 @@ L$_num_blocks_is_5_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,64(%rsp) - movq %rbx,72(%rsp) + movq %rbx,64 + 8(%rsp) vmovdqa 64(%rsp),%xmm13 vmovdqu 64(%rdi),%xmm5 addq $0x50,%rdi @@ -4347,96 +4257,96 @@ L$_steal_cipher_5_amivrujEyduiFoi: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4457,96 +4367,96 @@ L$_done_5_amivrujEyduiFoi: vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 vpxor %xmm13,%xmm5,%xmm5 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 vpxor %xmm0,%xmm5,%xmm5 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 -.byte 98,242,85,8,222,232 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 -.byte 98,242,85,8,223,232 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vaesdec %xmm0,%xmm5,%xmm5 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 + vaesdeclast %xmm0,%xmm5,%xmm5 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4571,7 +4481,7 @@ L$_num_blocks_is_4_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4580,7 +4490,7 @@ L$_num_blocks_is_4_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 xorq %r11,%r11 @@ -4589,7 +4499,7 @@ L$_num_blocks_is_4_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,48(%rsp) - movq %rbx,56(%rsp) + movq %rbx,48 + 8(%rsp) vmovdqa 48(%rsp),%xmm12 vmovdqu 48(%rdi),%xmm4 addq $0x40,%rdi @@ -4610,81 +4520,81 @@ L$_steal_cipher_4_amivrujEyduiFoi: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4702,81 +4612,81 @@ L$_done_4_amivrujEyduiFoi: vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 vpxor %xmm12,%xmm4,%xmm4 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 vpxor %xmm0,%xmm4,%xmm4 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 -.byte 98,242,93,8,222,224 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 -.byte 98,242,93,8,223,224 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vaesdec %xmm0,%xmm4,%xmm4 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 + vaesdeclast %xmm0,%xmm4,%xmm4 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4799,7 +4709,7 @@ L$_num_blocks_is_3_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 xorq %r11,%r11 @@ -4808,7 +4718,7 @@ L$_num_blocks_is_3_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,32(%rsp) - movq %rbx,40(%rsp) + movq %rbx,32 + 8(%rsp) vmovdqa 32(%rsp),%xmm11 vmovdqu 32(%rdi),%xmm3 addq $0x30,%rdi @@ -4828,66 +4738,66 @@ L$_steal_cipher_3_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4902,66 +4812,66 @@ L$_done_3_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 vpxor %xmm0,%xmm3,%xmm3 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 -.byte 98,242,101,8,222,216 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 -.byte 98,242,101,8,223,216 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vaesdec %xmm0,%xmm3,%xmm3 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 + vaesdeclast %xmm0,%xmm3,%xmm3 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vpxor %xmm11,%xmm3,%xmm3 @@ -4982,7 +4892,7 @@ L$_num_blocks_is_2_amivrujEyduiFoi: cmovcq %r10,%r11 xorq %r11,%rax movq %rax,16(%rsp) - movq %rbx,24(%rsp) + movq %rbx,16 + 8(%rsp) vmovdqa 16(%rsp),%xmm10 vmovdqu 16(%rdi),%xmm2 addq $0x20,%rdi @@ -5001,51 +4911,51 @@ L$_steal_cipher_2_amivrujEyduiFoi: vmovdqa 16(%rsp),%xmm10 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -5057,51 +4967,51 @@ L$_steal_cipher_2_amivrujEyduiFoi: L$_done_2_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 vpxor %xmm0,%xmm2,%xmm2 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 -.byte 98,242,109,8,222,208 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 -.byte 98,242,109,8,223,208 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vaesdec %xmm0,%xmm2,%xmm2 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 + vaesdeclast %xmm0,%xmm2,%xmm2 vpxor %xmm9,%xmm1,%xmm1 vpxor %xmm10,%xmm2,%xmm2 vmovdqu %xmm1,(%rsi) @@ -5129,36 +5039,36 @@ L$_steal_cipher_1_amivrujEyduiFoi: vmovdqa64 %xmm9,%xmm10 vmovdqa 16(%rsp),%xmm9 vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 addq $0x10,%rsi vmovdqa %xmm10,%xmm0 @@ -5167,36 +5077,36 @@ L$_steal_cipher_1_amivrujEyduiFoi: L$_done_1_amivrujEyduiFoi: vpxor %xmm9,%xmm1,%xmm1 - vmovdqa 128(%rsp),%xmm0 + vmovdqu (%rcx),%xmm0 vpxor %xmm0,%xmm1,%xmm1 - vmovdqa 144(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 160(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 176(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 192(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 208(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 224(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 240(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 256(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 272(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 288(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 304(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 320(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 336(%rsp),%xmm0 -.byte 98,242,117,8,222,200 - vmovdqa 352(%rsp),%xmm0 -.byte 98,242,117,8,223,200 + vmovdqu 16(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 32(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 48(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 64(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 80(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 96(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 112(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 128(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 144(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 160(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 176(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 192(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 208(%rcx),%xmm0 + vaesdec %xmm0,%xmm1,%xmm1 + vmovdqu 224(%rcx),%xmm0 + vaesdeclast %xmm0,%xmm1,%xmm1 vpxor %xmm9,%xmm1,%xmm1 addq $0x10,%rsi vmovdqa %xmm1,%xmm8 diff --git a/generated-src/win-x86_64/crypto/fipsmodule/aesni-xts-avx512.asm b/generated-src/win-x86_64/crypto/fipsmodule/aesni-xts-avx512.asm index 039f0c7ac2..b1ea6dc5c3 100644 --- a/generated-src/win-x86_64/crypto/fipsmodule/aesni-xts-avx512.asm +++ b/generated-src/win-x86_64/crypto/fipsmodule/aesni-xts-avx512.asm @@ -42,92 +42,47 @@ DB 243,15,30,250 vmovdqu xmm0,XMMWORD[r10] vpxor xmm1,xmm1,xmm0 - vmovdqu xmm2,XMMWORD[r9] - vmovdqa XMMWORD[128+rsp],xmm2 - vmovdqu xmm0,XMMWORD[16+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[16+r9] - vmovdqa XMMWORD[144+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[32+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[32+r9] - vmovdqa XMMWORD[160+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[48+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[48+r9] - vmovdqa XMMWORD[176+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[64+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[64+r9] - vmovdqa XMMWORD[192+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[80+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[80+r9] - vmovdqa XMMWORD[208+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[96+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[96+r9] - vmovdqa XMMWORD[224+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[112+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[112+r9] - vmovdqa XMMWORD[240+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[128+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[128+r9] - vmovdqa XMMWORD[256+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[144+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[144+r9] - vmovdqa XMMWORD[272+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[160+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[160+r9] - vmovdqa XMMWORD[288+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[176+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[176+r9] - vmovdqa XMMWORD[304+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[192+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[192+r9] - vmovdqa XMMWORD[320+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[208+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[208+r9] - vmovdqa XMMWORD[336+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[224+r10] - DB 98,242,117,8,221,200 - - vmovdqu xmm2,XMMWORD[224+r9] - vmovdqa XMMWORD[352+rsp],xmm2 + vaesenclast xmm1,xmm1,xmm0 vmovdqa XMMWORD[rsp],xmm1 mov QWORD[((8 + 40))+rbp],rcx @@ -174,76 +129,76 @@ $L$_remaining_num_blocks_is_7_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -270,76 +225,76 @@ $L$_remaining_num_blocks_is_6_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -366,76 +321,76 @@ $L$_remaining_num_blocks_is_5_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -461,76 +416,76 @@ $L$_remaining_num_blocks_is_4_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -556,66 +511,66 @@ $L$_remaining_num_blocks_is_3_hEgxyDlCngwrfFe: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -635,51 +590,51 @@ $L$_remaining_num_blocks_is_2_hEgxyDlCngwrfFe: add rcx,0x20 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -694,36 +649,36 @@ $L$_remaining_num_blocks_is_1_hEgxyDlCngwrfFe: vmovdqu xmm1,XMMWORD[rcx] add rcx,0x10 vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 vmovdqu XMMWORD[rdx],xmm1 add rdx,0x10 @@ -741,20 +696,20 @@ $L$_start_by16_hEgxyDlCngwrfFe: vpshufb zmm1,zmm0,zmm8 vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] - DB 98,147,109,72,68,217,0 + vpclmulqdq zmm3,zmm2,zmm25,0x0 vpxorq zmm4{k2},zmm4,zmm2 vpxord zmm9,zmm3,zmm4 vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] - DB 98,147,77,72,68,249,0 + vpclmulqdq zmm7,zmm6,zmm25,0x0 vpxorq zmm5{k2},zmm5,zmm6 vpxord zmm10,zmm7,zmm5 vpsrldq zmm13,zmm9,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm11,zmm9,0x1 vpxord zmm11,zmm11,zmm14 vpsrldq zmm15,zmm10,0xf - DB 98,131,5,72,68,193,0 + vpclmulqdq zmm16,zmm15,zmm25,0x0 vpslldq zmm12,zmm10,0x1 vpxord zmm12,zmm12,zmm16 @@ -768,97 +723,97 @@ $L$_main_loop_run_16_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 vpxorq zmm3,zmm3,zmm11 vpxorq zmm4,zmm4,zmm12 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 vpxorq zmm3,zmm3,zmm0 vpxorq zmm4,zmm4,zmm0 vpsrldq zmm13,zmm11,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm15,zmm11,0x1 vpxord zmm15,zmm15,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 vpsrldq zmm13,zmm12,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm16,zmm12,0x1 vpxord zmm16,zmm16,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 vpsrldq zmm13,zmm15,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm17,zmm15,0x1 vpxord zmm17,zmm17,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 vpsrldq zmm13,zmm16,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm18,zmm16,0x1 vpxord zmm18,zmm18,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 - DB 98,242,101,72,220,216 - DB 98,242,93,72,220,224 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 - DB 98,242,101,72,221,216 - DB 98,242,93,72,221,224 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 + vaesenc zmm3,zmm3,zmm0 + vaesenc zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 + vaesenclast zmm3,zmm3,zmm0 + vaesenclast zmm4,zmm4,zmm0 vpxorq zmm1,zmm1,zmm9 vpxorq zmm2,zmm2,zmm10 vpxorq zmm3,zmm3,zmm11 @@ -889,12 +844,12 @@ $L$_start_by8_hEgxyDlCngwrfFe: vpshufb zmm1,zmm0,zmm8 vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] - DB 98,147,109,72,68,217,0 + vpclmulqdq zmm3,zmm2,zmm25,0x0 vpxorq zmm4{k2},zmm4,zmm2 vpxord zmm9,zmm3,zmm4 vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] - DB 98,147,77,72,68,249,0 + vpclmulqdq zmm7,zmm6,zmm25,0x0 vpxorq zmm5{k2},zmm5,zmm6 vpxord zmm10,zmm7,zmm5 @@ -907,84 +862,84 @@ $L$_main_loop_run_8_hEgxyDlCngwrfFe: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 vpsrldq zmm13,zmm9,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm15,zmm9,0x1 vpxord zmm15,zmm15,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 vpsrldq zmm13,zmm10,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm16,zmm10,0x1 vpxord zmm16,zmm16,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,220,200 - DB 98,242,109,72,220,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesenc zmm1,zmm1,zmm0 + vaesenc zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,221,200 - DB 98,242,109,72,221,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesenclast zmm1,zmm1,zmm0 + vaesenclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -1027,21 +982,21 @@ $L$_steal_cipher_hEgxyDlCngwrfFe: vpshufb xmm3,xmm3,xmm10 vpblendvb xmm3,xmm3,xmm2,xmm10 vpxor xmm8,xmm3,xmm0 - vpxor xmm8,xmm8,XMMWORD[128+rsp] - DB 98,114,61,8,220,132,36,144,0,0,0 - DB 98,114,61,8,220,132,36,160,0,0,0 - DB 98,114,61,8,220,132,36,176,0,0,0 - DB 98,114,61,8,220,132,36,192,0,0,0 - DB 98,114,61,8,220,132,36,208,0,0,0 - DB 98,114,61,8,220,132,36,224,0,0,0 - DB 98,114,61,8,220,132,36,240,0,0,0 - DB 98,114,61,8,220,132,36,0,1,0,0 - DB 98,114,61,8,220,132,36,16,1,0,0 - DB 98,114,61,8,220,132,36,32,1,0,0 - DB 98,114,61,8,220,132,36,48,1,0,0 - DB 98,114,61,8,220,132,36,64,1,0,0 - DB 98,114,61,8,220,132,36,80,1,0,0 - DB 98,114,61,8,221,132,36,96,1,0,0 + vpxor xmm8,xmm8,XMMWORD[r9] + vaesenc xmm8,xmm8,XMMWORD[16+r9] + vaesenc xmm8,xmm8,XMMWORD[32+r9] + vaesenc xmm8,xmm8,XMMWORD[48+r9] + vaesenc xmm8,xmm8,XMMWORD[64+r9] + vaesenc xmm8,xmm8,XMMWORD[80+r9] + vaesenc xmm8,xmm8,XMMWORD[96+r9] + vaesenc xmm8,xmm8,XMMWORD[112+r9] + vaesenc xmm8,xmm8,XMMWORD[128+r9] + vaesenc xmm8,xmm8,XMMWORD[144+r9] + vaesenc xmm8,xmm8,XMMWORD[160+r9] + vaesenc xmm8,xmm8,XMMWORD[176+r9] + vaesenc xmm8,xmm8,XMMWORD[192+r9] + vaesenc xmm8,xmm8,XMMWORD[208+r9] + vaesenclast xmm8,xmm8,XMMWORD[224+r9] vpxor xmm8,xmm8,xmm0 vmovdqu XMMWORD[(-16)+rdx],xmm8 $L$_ret_hEgxyDlCngwrfFe: @@ -1122,7 +1077,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -1131,7 +1086,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -1140,7 +1095,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -1149,7 +1104,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] xor rsi,rsi @@ -1158,7 +1113,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[80+rsp],rax - mov QWORD[88+rsp],rbx + mov QWORD[((80 + 8))+rsp],rbx vmovdqa xmm14,XMMWORD[80+rsp] vmovdqu xmm6,XMMWORD[80+rcx] xor rsi,rsi @@ -1167,7 +1122,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[96+rsp],rax - mov QWORD[104+rsp],rbx + mov QWORD[((96 + 8))+rsp],rbx vmovdqa xmm15,XMMWORD[96+rsp] vmovdqu xmm7,XMMWORD[96+rcx] add rcx,0x70 @@ -1178,7 +1133,7 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 vpxor xmm7,xmm7,xmm15 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 @@ -1186,118 +1141,118 @@ $L$_num_blocks_is_7_hEgxyDlCngwrfFe: vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 vpxor xmm7,xmm7,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - DB 98,242,69,8,220,248 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 - DB 98,242,93,8,221,224 - DB 98,242,85,8,221,232 - DB 98,242,77,8,221,240 - DB 98,242,69,8,221,248 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vaesenc xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 + vaesenclast xmm4,xmm4,xmm0 + vaesenclast xmm5,xmm5,xmm0 + vaesenclast xmm6,xmm6,xmm0 + vaesenclast xmm7,xmm7,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -1329,7 +1284,7 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -1338,7 +1293,7 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -1347,7 +1302,7 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -1356,7 +1311,7 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] xor rsi,rsi @@ -1365,7 +1320,7 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[80+rsp],rax - mov QWORD[88+rsp],rbx + mov QWORD[((80 + 8))+rsp],rbx vmovdqa xmm14,XMMWORD[80+rsp] vmovdqu xmm6,XMMWORD[80+rcx] add rcx,0x60 @@ -1375,111 +1330,111 @@ $L$_num_blocks_is_6_hEgxyDlCngwrfFe: vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - DB 98,242,77,8,220,240 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 - DB 98,242,93,8,221,224 - DB 98,242,85,8,221,232 - DB 98,242,77,8,221,240 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vaesenc xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 + vaesenclast xmm4,xmm4,xmm0 + vaesenclast xmm5,xmm5,xmm0 + vaesenclast xmm6,xmm6,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -1509,7 +1464,7 @@ $L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -1518,7 +1473,7 @@ $L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -1527,7 +1482,7 @@ $L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -1536,7 +1491,7 @@ $L$_num_blocks_is_5_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] add rcx,0x50 @@ -1545,96 +1500,96 @@ $L$_num_blocks_is_5_hEgxyDlCngwrfFe: vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - DB 98,242,85,8,220,232 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 - DB 98,242,93,8,221,224 - DB 98,242,85,8,221,232 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vaesenc xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 + vaesenclast xmm4,xmm4,xmm0 + vaesenclast xmm5,xmm5,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -1662,7 +1617,7 @@ $L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -1671,7 +1626,7 @@ $L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -1680,7 +1635,7 @@ $L$_num_blocks_is_4_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] add rcx,0x40 @@ -1688,81 +1643,81 @@ $L$_num_blocks_is_4_hEgxyDlCngwrfFe: vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - DB 98,242,93,8,220,224 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 - DB 98,242,93,8,221,224 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vaesenc xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 + vaesenclast xmm4,xmm4,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -1788,7 +1743,7 @@ $L$_num_blocks_is_3_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -1797,73 +1752,73 @@ $L$_num_blocks_is_3_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] add rcx,0x30 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - DB 98,242,101,8,220,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 - DB 98,242,101,8,221,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vaesenc xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 + vaesenclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -1887,57 +1842,57 @@ $L$_num_blocks_is_2_hEgxyDlCngwrfFe: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] add rcx,0x20 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - DB 98,242,109,8,220,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 - DB 98,242,109,8,221,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vaesenc xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 + vaesenclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -1955,36 +1910,36 @@ $L$_num_blocks_is_1_hEgxyDlCngwrfFe: vmovdqu xmm1,XMMWORD[rcx] add rcx,0x10 vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,220,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,221,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesenc xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesenclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 vmovdqu XMMWORD[rdx],xmm1 add rdx,0x10 @@ -2023,92 +1978,47 @@ DB 243,15,30,250 vmovdqu xmm0,XMMWORD[r10] vpxor xmm1,xmm1,xmm0 - vmovdqu xmm2,XMMWORD[224+r9] - vmovdqa XMMWORD[352+rsp],xmm2 - vmovdqu xmm0,XMMWORD[16+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[208+r9] - vmovdqa XMMWORD[336+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[32+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[192+r9] - vmovdqa XMMWORD[320+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[48+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[176+r9] - vmovdqa XMMWORD[304+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[64+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[160+r9] - vmovdqa XMMWORD[288+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[80+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[144+r9] - vmovdqa XMMWORD[272+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[96+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[128+r9] - vmovdqa XMMWORD[256+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[112+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[112+r9] - vmovdqa XMMWORD[240+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[128+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[96+r9] - vmovdqa XMMWORD[224+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[144+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[80+r9] - vmovdqa XMMWORD[208+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[160+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[64+r9] - vmovdqa XMMWORD[192+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[176+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[48+r9] - vmovdqa XMMWORD[176+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[192+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[32+r9] - vmovdqa XMMWORD[160+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[208+r10] - DB 98,242,117,8,220,200 - - vmovdqu xmm2,XMMWORD[16+r9] - vmovdqa XMMWORD[144+rsp],xmm2 + vaesenc xmm1,xmm1,xmm0 vmovdqu xmm0,XMMWORD[224+r10] - DB 98,242,117,8,221,200 - - vmovdqu xmm2,XMMWORD[r9] - vmovdqa XMMWORD[128+rsp],xmm2 + vaesenclast xmm1,xmm1,xmm0 vmovdqa XMMWORD[rsp],xmm1 mov QWORD[((8 + 40))+rbp],rcx @@ -2143,36 +2053,36 @@ $L$_do_n_blocks_amivrujEyduiFoi: vmovdqu xmm1,xmm5 vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 vmovdqu XMMWORD[(-16)+rdx],xmm1 vmovdqa xmm8,xmm1 @@ -2207,76 +2117,76 @@ $L$_remaining_num_blocks_is_7_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2298,76 +2208,76 @@ $L$_done_7_remain_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2394,76 +2304,76 @@ $L$_remaining_num_blocks_is_6_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2485,76 +2395,76 @@ $L$_done_6_remain_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2580,76 +2490,76 @@ $L$_remaining_num_blocks_is_5_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2671,76 +2581,76 @@ $L$_done_5_remain_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2765,76 +2675,76 @@ $L$_remaining_num_blocks_is_4_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2855,76 +2765,76 @@ $L$_done_4_remain_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -2949,66 +2859,66 @@ $L$_remaining_num_blocks_is_3_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -3026,66 +2936,66 @@ $L$_done_3_remain_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -3104,51 +3014,51 @@ $L$_remaining_num_blocks_is_2_amivrujEyduiFoi: vextracti32x4 xmm12,zmm9,0x1 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -3162,51 +3072,51 @@ $L$_done_2_remain_amivrujEyduiFoi: vextracti32x4 xmm10,zmm9,0x1 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -3220,36 +3130,36 @@ $L$_remaining_num_blocks_is_1_amivrujEyduiFoi: je NEAR $L$_done_1_remain_amivrujEyduiFoi vextracti32x4 xmm11,zmm9,0x1 vpxor xmm1,xmm1,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm11 vmovdqu XMMWORD[rdx],xmm1 add rdx,0x10 @@ -3259,36 +3169,36 @@ $L$_remaining_num_blocks_is_1_amivrujEyduiFoi: $L$_done_1_remain_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 vmovdqu XMMWORD[rdx],xmm1 jmp NEAR $L$_ret_amivrujEyduiFoi @@ -3303,25 +3213,25 @@ $L$_start_by16_amivrujEyduiFoi: vpshufb zmm1,zmm0,zmm8 vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] - DB 98,147,109,72,68,217,0 + vpclmulqdq zmm3,zmm2,zmm25,0x0 vpxorq zmm4{k2},zmm4,zmm2 vpxord zmm9,zmm3,zmm4 vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] - DB 98,147,77,72,68,249,0 + vpclmulqdq zmm7,zmm6,zmm25,0x0 vpxorq zmm5{k2},zmm5,zmm6 vpxord zmm10,zmm7,zmm5 vpsrldq zmm13,zmm9,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm11,zmm9,0x1 vpxord zmm11,zmm11,zmm14 vpsrldq zmm15,zmm10,0xf - DB 98,131,5,72,68,193,0 + vpclmulqdq zmm16,zmm15,zmm25,0x0 vpslldq zmm12,zmm10,0x1 vpxord zmm12,zmm12,zmm16 @@ -3336,97 +3246,97 @@ $L$_main_loop_run_16_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 vpxorq zmm3,zmm3,zmm11 vpxorq zmm4,zmm4,zmm12 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 vpxorq zmm3,zmm3,zmm0 vpxorq zmm4,zmm4,zmm0 vpsrldq zmm13,zmm11,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm15,zmm11,0x1 vpxord zmm15,zmm15,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 vpsrldq zmm13,zmm12,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm16,zmm12,0x1 vpxord zmm16,zmm16,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 vpsrldq zmm13,zmm15,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm17,zmm15,0x1 vpxord zmm17,zmm17,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 vpsrldq zmm13,zmm16,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm18,zmm16,0x1 vpxord zmm18,zmm18,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 - DB 98,242,101,72,222,216 - DB 98,242,93,72,222,224 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 - DB 98,242,101,72,223,216 - DB 98,242,93,72,223,224 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 + vaesdec zmm3,zmm3,zmm0 + vaesdec zmm4,zmm4,zmm0 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 + vaesdeclast zmm3,zmm3,zmm0 + vaesdeclast zmm4,zmm4,zmm0 vpxorq zmm1,zmm1,zmm9 vpxorq zmm2,zmm2,zmm10 vpxorq zmm3,zmm3,zmm11 @@ -3460,14 +3370,14 @@ $L$_start_by8_amivrujEyduiFoi: vpshufb zmm1,zmm0,zmm8 vpsllvq zmm4,zmm0,ZMMWORD[const_dq3210] vpsrlvq zmm2,zmm1,ZMMWORD[const_dq5678] - DB 98,147,109,72,68,217,0 + vpclmulqdq zmm3,zmm2,zmm25,0x0 vpxorq zmm4{k2},zmm4,zmm2 vpxord zmm9,zmm3,zmm4 vpsllvq zmm5,zmm0,ZMMWORD[const_dq7654] vpsrlvq zmm6,zmm1,ZMMWORD[const_dq1234] - DB 98,147,77,72,68,249,0 + vpclmulqdq zmm7,zmm6,zmm25,0x0 vpxorq zmm5{k2},zmm5,zmm6 vpxord zmm10,zmm7,zmm5 @@ -3481,84 +3391,84 @@ $L$_main_loop_run_8_amivrujEyduiFoi: vpxorq zmm2,zmm2,zmm10 - vbroadcasti32x4 zmm0,ZMMWORD[128+rsp] + vbroadcasti32x4 zmm0,ZMMWORD[r9] vpxorq zmm1,zmm1,zmm0 vpxorq zmm2,zmm2,zmm0 vpsrldq zmm13,zmm9,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm15,zmm9,0x1 vpxord zmm15,zmm15,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[144+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[16+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[160+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[32+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[176+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[48+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 vpsrldq zmm13,zmm10,0xf - DB 98,19,21,72,68,241,0 + vpclmulqdq zmm14,zmm13,zmm25,0x0 vpslldq zmm16,zmm10,0x1 vpxord zmm16,zmm16,zmm14 - vbroadcasti32x4 zmm0,ZMMWORD[192+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[64+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[208+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[80+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[224+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[96+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[240+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[112+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[256+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[128+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[272+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[144+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[288+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[160+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[304+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[176+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[320+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[192+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[336+rsp] - DB 98,242,117,72,222,200 - DB 98,242,109,72,222,208 + vbroadcasti32x4 zmm0,ZMMWORD[208+r9] + vaesdec zmm1,zmm1,zmm0 + vaesdec zmm2,zmm2,zmm0 - vbroadcasti32x4 zmm0,ZMMWORD[352+rsp] - DB 98,242,117,72,223,200 - DB 98,242,109,72,223,208 + vbroadcasti32x4 zmm0,ZMMWORD[224+r9] + vaesdeclast zmm1,zmm1,zmm0 + vaesdeclast zmm2,zmm2,zmm0 vpxorq zmm1,zmm1,zmm9 @@ -3602,21 +3512,21 @@ $L$_steal_cipher_amivrujEyduiFoi: vpxor xmm8,xmm3,xmm0 - vpxor xmm8,xmm8,XMMWORD[128+rsp] - DB 98,114,61,8,222,132,36,144,0,0,0 - DB 98,114,61,8,222,132,36,160,0,0,0 - DB 98,114,61,8,222,132,36,176,0,0,0 - DB 98,114,61,8,222,132,36,192,0,0,0 - DB 98,114,61,8,222,132,36,208,0,0,0 - DB 98,114,61,8,222,132,36,224,0,0,0 - DB 98,114,61,8,222,132,36,240,0,0,0 - DB 98,114,61,8,222,132,36,0,1,0,0 - DB 98,114,61,8,222,132,36,16,1,0,0 - DB 98,114,61,8,222,132,36,32,1,0,0 - DB 98,114,61,8,222,132,36,48,1,0,0 - DB 98,114,61,8,222,132,36,64,1,0,0 - DB 98,114,61,8,222,132,36,80,1,0,0 - DB 98,114,61,8,223,132,36,96,1,0,0 + vpxor xmm8,xmm8,XMMWORD[r9] + vaesdec xmm8,xmm8,XMMWORD[16+r9] + vaesdec xmm8,xmm8,XMMWORD[32+r9] + vaesdec xmm8,xmm8,XMMWORD[48+r9] + vaesdec xmm8,xmm8,XMMWORD[64+r9] + vaesdec xmm8,xmm8,XMMWORD[80+r9] + vaesdec xmm8,xmm8,XMMWORD[96+r9] + vaesdec xmm8,xmm8,XMMWORD[112+r9] + vaesdec xmm8,xmm8,XMMWORD[128+r9] + vaesdec xmm8,xmm8,XMMWORD[144+r9] + vaesdec xmm8,xmm8,XMMWORD[160+r9] + vaesdec xmm8,xmm8,XMMWORD[176+r9] + vaesdec xmm8,xmm8,XMMWORD[192+r9] + vaesdec xmm8,xmm8,XMMWORD[208+r9] + vaesdeclast xmm8,xmm8,XMMWORD[224+r9] vpxor xmm8,xmm8,xmm0 @@ -3703,7 +3613,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -3712,7 +3622,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -3721,7 +3631,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -3730,7 +3640,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] xor rsi,rsi @@ -3739,7 +3649,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[80+rsp],rax - mov QWORD[88+rsp],rbx + mov QWORD[((80 + 8))+rsp],rbx vmovdqa xmm14,XMMWORD[80+rsp] vmovdqu xmm6,XMMWORD[80+rcx] xor rsi,rsi @@ -3748,7 +3658,7 @@ $L$_num_blocks_is_7_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[96+rsp],rax - mov QWORD[104+rsp],rbx + mov QWORD[((96 + 8))+rsp],rbx vmovdqa xmm15,XMMWORD[96+rsp] vmovdqu xmm7,XMMWORD[96+rcx] add rcx,0x70 @@ -3772,7 +3682,7 @@ $L$_steal_cipher_7_amivrujEyduiFoi: vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 vpxor xmm7,xmm7,xmm15 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 @@ -3780,118 +3690,118 @@ $L$_steal_cipher_7_amivrujEyduiFoi: vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 vpxor xmm7,xmm7,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 - DB 98,242,77,8,223,240 - DB 98,242,69,8,223,248 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -3918,7 +3828,7 @@ $L$_done_7_amivrujEyduiFoi: vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 vpxor xmm7,xmm7,xmm15 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 @@ -3926,118 +3836,118 @@ $L$_done_7_amivrujEyduiFoi: vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 vpxor xmm7,xmm7,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - DB 98,242,69,8,222,248 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 - DB 98,242,77,8,223,240 - DB 98,242,69,8,223,248 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vaesdec xmm7,xmm7,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 + vaesdeclast xmm7,xmm7,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4066,7 +3976,7 @@ $L$_num_blocks_is_6_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -4075,7 +3985,7 @@ $L$_num_blocks_is_6_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -4084,7 +3994,7 @@ $L$_num_blocks_is_6_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -4093,7 +4003,7 @@ $L$_num_blocks_is_6_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] xor rsi,rsi @@ -4102,7 +4012,7 @@ $L$_num_blocks_is_6_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[80+rsp],rax - mov QWORD[88+rsp],rbx + mov QWORD[((80 + 8))+rsp],rbx vmovdqa xmm14,XMMWORD[80+rsp] vmovdqu xmm6,XMMWORD[80+rcx] add rcx,0x60 @@ -4125,111 +4035,111 @@ $L$_steal_cipher_6_amivrujEyduiFoi: vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 - DB 98,242,77,8,223,240 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4253,111 +4163,111 @@ $L$_done_6_amivrujEyduiFoi: vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 vpxor xmm6,xmm6,xmm14 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 vpxor xmm6,xmm6,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - DB 98,242,77,8,222,240 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 - DB 98,242,77,8,223,240 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vaesdec xmm6,xmm6,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 + vaesdeclast xmm6,xmm6,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4384,7 +4294,7 @@ $L$_num_blocks_is_5_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -4393,7 +4303,7 @@ $L$_num_blocks_is_5_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -4402,7 +4312,7 @@ $L$_num_blocks_is_5_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] xor rsi,rsi @@ -4411,7 +4321,7 @@ $L$_num_blocks_is_5_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[64+rsp],rax - mov QWORD[72+rsp],rbx + mov QWORD[((64 + 8))+rsp],rbx vmovdqa xmm13,XMMWORD[64+rsp] vmovdqu xmm5,XMMWORD[64+rcx] add rcx,0x50 @@ -4433,96 +4343,96 @@ $L$_steal_cipher_5_amivrujEyduiFoi: vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4543,96 +4453,96 @@ $L$_done_5_amivrujEyduiFoi: vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 vpxor xmm5,xmm5,xmm13 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 vpxor xmm5,xmm5,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - DB 98,242,85,8,222,232 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 - DB 98,242,85,8,223,232 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vaesdec xmm5,xmm5,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 + vaesdeclast xmm5,xmm5,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4657,7 +4567,7 @@ $L$_num_blocks_is_4_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -4666,7 +4576,7 @@ $L$_num_blocks_is_4_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] xor rsi,rsi @@ -4675,7 +4585,7 @@ $L$_num_blocks_is_4_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[48+rsp],rax - mov QWORD[56+rsp],rbx + mov QWORD[((48 + 8))+rsp],rbx vmovdqa xmm12,XMMWORD[48+rsp] vmovdqu xmm4,XMMWORD[48+rcx] add rcx,0x40 @@ -4696,81 +4606,81 @@ $L$_steal_cipher_4_amivrujEyduiFoi: vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4788,81 +4698,81 @@ $L$_done_4_amivrujEyduiFoi: vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 vpxor xmm4,xmm4,xmm12 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 vpxor xmm4,xmm4,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - DB 98,242,93,8,222,224 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 - DB 98,242,93,8,223,224 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vaesdec xmm4,xmm4,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 + vaesdeclast xmm4,xmm4,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4885,7 +4795,7 @@ $L$_num_blocks_is_3_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] xor rsi,rsi @@ -4894,7 +4804,7 @@ $L$_num_blocks_is_3_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[32+rsp],rax - mov QWORD[40+rsp],rbx + mov QWORD[((32 + 8))+rsp],rbx vmovdqa xmm11,XMMWORD[32+rsp] vmovdqu xmm3,XMMWORD[32+rcx] add rcx,0x30 @@ -4914,66 +4824,66 @@ $L$_steal_cipher_3_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -4988,66 +4898,66 @@ $L$_done_3_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 vpxor xmm3,xmm3,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - DB 98,242,101,8,222,216 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 - DB 98,242,101,8,223,216 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vaesdec xmm3,xmm3,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 + vaesdeclast xmm3,xmm3,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vpxor xmm3,xmm3,xmm11 @@ -5068,7 +4978,7 @@ $L$_num_blocks_is_2_amivrujEyduiFoi: cmovc rsi,rdi xor rax,rsi mov QWORD[16+rsp],rax - mov QWORD[24+rsp],rbx + mov QWORD[((16 + 8))+rsp],rbx vmovdqa xmm10,XMMWORD[16+rsp] vmovdqu xmm2,XMMWORD[16+rcx] add rcx,0x20 @@ -5087,51 +4997,51 @@ $L$_steal_cipher_2_amivrujEyduiFoi: vmovdqa xmm10,XMMWORD[16+rsp] vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -5143,51 +5053,51 @@ $L$_steal_cipher_2_amivrujEyduiFoi: $L$_done_2_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 vpxor xmm2,xmm2,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - DB 98,242,109,8,222,208 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 - DB 98,242,109,8,223,208 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vaesdec xmm2,xmm2,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 + vaesdeclast xmm2,xmm2,xmm0 vpxor xmm1,xmm1,xmm9 vpxor xmm2,xmm2,xmm10 vmovdqu XMMWORD[rdx],xmm1 @@ -5215,36 +5125,36 @@ $L$_steal_cipher_1_amivrujEyduiFoi: vmovdqa64 xmm10,xmm9 vmovdqa xmm9,XMMWORD[16+rsp] vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 add rdx,0x10 vmovdqa xmm0,xmm10 @@ -5253,36 +5163,36 @@ $L$_steal_cipher_1_amivrujEyduiFoi: $L$_done_1_amivrujEyduiFoi: vpxor xmm1,xmm1,xmm9 - vmovdqa xmm0,XMMWORD[128+rsp] + vmovdqu xmm0,XMMWORD[r9] vpxor xmm1,xmm1,xmm0 - vmovdqa xmm0,XMMWORD[144+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[160+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[176+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[192+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[208+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[224+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[240+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[256+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[272+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[288+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[304+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[320+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[336+rsp] - DB 98,242,117,8,222,200 - vmovdqa xmm0,XMMWORD[352+rsp] - DB 98,242,117,8,223,200 + vmovdqu xmm0,XMMWORD[16+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[32+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[48+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[64+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[80+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[96+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[112+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[128+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[144+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[160+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[176+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[192+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[208+r9] + vaesdec xmm1,xmm1,xmm0 + vmovdqu xmm0,XMMWORD[224+r9] + vaesdeclast xmm1,xmm1,xmm0 vpxor xmm1,xmm1,xmm9 add rdx,0x10 vmovdqa xmm8,xmm1 diff --git a/util/fipstools/delocate/delocate.peg.go b/util/fipstools/delocate/delocate.peg.go index 30b59865d6..de6b6b6f57 100644 --- a/util/fipstools/delocate/delocate.peg.go +++ b/util/fipstools/delocate/delocate.peg.go @@ -7224,7 +7224,7 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position890, tokenIndex890 return false }, - /* 57 Offset <- <('+'? '-'? (('0' ('b' / 'B') ('0' / '1')+) / ('0' ('x' / 'X') ([0-9] / [0-9] / ([a-f] / [A-F]))+) / ((([0-9]+ WS OffsetOperator [0-9]+) / ([0-9]+ (OffsetOperator '(' [0-9]+ OffsetOperator [0-9]+ ')')?) / ([0-9]+ (OffsetOperator [0-9]+ OffsetOperator [0-9]+)?) / ([0-9]+ (OffsetOperator [0-9]+)?) / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')' OffsetOperator [0-9]+ OffsetOperator [0-9]+) / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')' OffsetOperator [0-9]+ !'x') / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')') / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ WS? OffsetOperator WS? [0-9]+ ')')) !([a-z] / [A-Z]))))> */ + /* 57 Offset <- <('+'? '-'? (('0' ('b' / 'B') ('0' / '1')+) / ('0' ('x' / 'X') ([0-9] / [0-9] / ([a-f] / [A-F]))+) / ((([0-9]+ WS OffsetOperator WS? [0-9]+) / ([0-9]+ (OffsetOperator '(' [0-9]+ OffsetOperator [0-9]+ ')')?) / ([0-9]+ (OffsetOperator [0-9]+ OffsetOperator [0-9]+)?) / ([0-9]+ (OffsetOperator [0-9]+)?) / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')' OffsetOperator [0-9]+ OffsetOperator [0-9]+) / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')' OffsetOperator [0-9]+ !'x') / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ ')') / ('(' [0-9]+ WS? OffsetOperator WS? [0-9]+ WS? OffsetOperator WS? [0-9]+ ')')) !([a-z] / [A-Z]))))> */ func() bool { if memoized, ok := memoization[memoKey{57, position}]; ok { return memoizedResult(memoized) @@ -7433,228 +7433,225 @@ func (p *Asm) Init(options ...func(*Asm) error) error { if !_rules[ruleOffsetOperator]() { goto l927 } + { + position930, tokenIndex930 := position, tokenIndex + if !_rules[ruleWS]() { + goto l930 + } + goto l931 + l930: + position, tokenIndex = position930, tokenIndex930 + } + l931: if c := buffer[position]; c < rune('0') || c > rune('9') { goto l927 } position++ - l930: + l932: { - position931, tokenIndex931 := position, tokenIndex + position933, tokenIndex933 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l931 + goto l933 } position++ - goto l930 - l931: - position, tokenIndex = position931, tokenIndex931 + goto l932 + l933: + position, tokenIndex = position933, tokenIndex933 } goto l926 l927: position, tokenIndex = position926, tokenIndex926 if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l932 + goto l934 } position++ - l933: + l935: { - position934, tokenIndex934 := position, tokenIndex + position936, tokenIndex936 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l934 + goto l936 } position++ - goto l933 - l934: - position, tokenIndex = position934, tokenIndex934 + goto l935 + l936: + position, tokenIndex = position936, tokenIndex936 } { - position935, tokenIndex935 := position, tokenIndex + position937, tokenIndex937 := position, tokenIndex if !_rules[ruleOffsetOperator]() { - goto l935 + goto l937 } if buffer[position] != rune('(') { - goto l935 + goto l937 } position++ if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l935 + goto l937 } position++ - l937: + l939: { - position938, tokenIndex938 := position, tokenIndex + position940, tokenIndex940 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l938 + goto l940 } position++ - goto l937 - l938: - position, tokenIndex = position938, tokenIndex938 + goto l939 + l940: + position, tokenIndex = position940, tokenIndex940 } if !_rules[ruleOffsetOperator]() { - goto l935 + goto l937 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l935 + goto l937 } position++ - l939: + l941: { - position940, tokenIndex940 := position, tokenIndex + position942, tokenIndex942 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l940 + goto l942 } position++ - goto l939 - l940: - position, tokenIndex = position940, tokenIndex940 + goto l941 + l942: + position, tokenIndex = position942, tokenIndex942 } if buffer[position] != rune(')') { - goto l935 + goto l937 } position++ - goto l936 - l935: - position, tokenIndex = position935, tokenIndex935 + goto l938 + l937: + position, tokenIndex = position937, tokenIndex937 } - l936: + l938: goto l926 - l932: + l934: position, tokenIndex = position926, tokenIndex926 if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l941 + goto l943 } position++ - l942: + l944: { - position943, tokenIndex943 := position, tokenIndex + position945, tokenIndex945 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l943 + goto l945 } position++ - goto l942 - l943: - position, tokenIndex = position943, tokenIndex943 + goto l944 + l945: + position, tokenIndex = position945, tokenIndex945 } { - position944, tokenIndex944 := position, tokenIndex + position946, tokenIndex946 := position, tokenIndex if !_rules[ruleOffsetOperator]() { - goto l944 + goto l946 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l944 + goto l946 } position++ - l946: + l948: { - position947, tokenIndex947 := position, tokenIndex + position949, tokenIndex949 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l947 + goto l949 } position++ - goto l946 - l947: - position, tokenIndex = position947, tokenIndex947 + goto l948 + l949: + position, tokenIndex = position949, tokenIndex949 } if !_rules[ruleOffsetOperator]() { - goto l944 + goto l946 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l944 + goto l946 } position++ - l948: + l950: { - position949, tokenIndex949 := position, tokenIndex + position951, tokenIndex951 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l949 + goto l951 } position++ - goto l948 - l949: - position, tokenIndex = position949, tokenIndex949 + goto l950 + l951: + position, tokenIndex = position951, tokenIndex951 } - goto l945 - l944: - position, tokenIndex = position944, tokenIndex944 + goto l947 + l946: + position, tokenIndex = position946, tokenIndex946 } - l945: + l947: goto l926 - l941: + l943: position, tokenIndex = position926, tokenIndex926 if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l950 + goto l952 } position++ - l951: + l953: { - position952, tokenIndex952 := position, tokenIndex + position954, tokenIndex954 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l952 + goto l954 } position++ - goto l951 - l952: - position, tokenIndex = position952, tokenIndex952 + goto l953 + l954: + position, tokenIndex = position954, tokenIndex954 } { - position953, tokenIndex953 := position, tokenIndex + position955, tokenIndex955 := position, tokenIndex if !_rules[ruleOffsetOperator]() { - goto l953 + goto l955 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l953 + goto l955 } position++ - l955: + l957: { - position956, tokenIndex956 := position, tokenIndex + position958, tokenIndex958 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l956 + goto l958 } position++ - goto l955 - l956: - position, tokenIndex = position956, tokenIndex956 + goto l957 + l958: + position, tokenIndex = position958, tokenIndex958 } - goto l954 - l953: - position, tokenIndex = position953, tokenIndex953 + goto l956 + l955: + position, tokenIndex = position955, tokenIndex955 } - l954: + l956: goto l926 - l950: + l952: position, tokenIndex = position926, tokenIndex926 if buffer[position] != rune('(') { - goto l957 + goto l959 } position++ if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l957 + goto l959 } position++ - l958: + l960: { - position959, tokenIndex959 := position, tokenIndex + position961, tokenIndex961 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l959 + goto l961 } position++ - goto l958 - l959: - position, tokenIndex = position959, tokenIndex959 - } - { - position960, tokenIndex960 := position, tokenIndex - if !_rules[ruleWS]() { - goto l960 - } - goto l961 - l960: - position, tokenIndex = position960, tokenIndex960 - } - l961: - if !_rules[ruleOffsetOperator]() { - goto l957 + goto l960 + l961: + position, tokenIndex = position961, tokenIndex961 } { position962, tokenIndex962 := position, tokenIndex @@ -7666,30 +7663,21 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position962, tokenIndex962 } l963: - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l957 + if !_rules[ruleOffsetOperator]() { + goto l959 } - position++ - l964: { - position965, tokenIndex965 := position, tokenIndex - if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l965 + position964, tokenIndex964 := position, tokenIndex + if !_rules[ruleWS]() { + goto l964 } - position++ - goto l964 - l965: - position, tokenIndex = position965, tokenIndex965 - } - if buffer[position] != rune(')') { - goto l957 - } - position++ - if !_rules[ruleOffsetOperator]() { - goto l957 + goto l965 + l964: + position, tokenIndex = position964, tokenIndex964 } + l965: if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l957 + goto l959 } position++ l966: @@ -7703,11 +7691,15 @@ func (p *Asm) Init(options ...func(*Asm) error) error { l967: position, tokenIndex = position967, tokenIndex967 } + if buffer[position] != rune(')') { + goto l959 + } + position++ if !_rules[ruleOffsetOperator]() { - goto l957 + goto l959 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l957 + goto l959 } position++ l968: @@ -7721,40 +7713,45 @@ func (p *Asm) Init(options ...func(*Asm) error) error { l969: position, tokenIndex = position969, tokenIndex969 } + if !_rules[ruleOffsetOperator]() { + goto l959 + } + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l959 + } + position++ + l970: + { + position971, tokenIndex971 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { + goto l971 + } + position++ + goto l970 + l971: + position, tokenIndex = position971, tokenIndex971 + } goto l926 - l957: + l959: position, tokenIndex = position926, tokenIndex926 if buffer[position] != rune('(') { - goto l970 + goto l972 } position++ if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l970 + goto l972 } position++ - l971: + l973: { - position972, tokenIndex972 := position, tokenIndex + position974, tokenIndex974 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l972 + goto l974 } position++ - goto l971 - l972: - position, tokenIndex = position972, tokenIndex972 - } - { - position973, tokenIndex973 := position, tokenIndex - if !_rules[ruleWS]() { - goto l973 - } - goto l974 - l973: - position, tokenIndex = position973, tokenIndex973 - } - l974: - if !_rules[ruleOffsetOperator]() { - goto l970 + goto l973 + l974: + position, tokenIndex = position974, tokenIndex974 } { position975, tokenIndex975 := position, tokenIndex @@ -7766,87 +7763,87 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position975, tokenIndex975 } l976: + if !_rules[ruleOffsetOperator]() { + goto l972 + } + { + position977, tokenIndex977 := position, tokenIndex + if !_rules[ruleWS]() { + goto l977 + } + goto l978 + l977: + position, tokenIndex = position977, tokenIndex977 + } + l978: if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l970 + goto l972 } position++ - l977: + l979: { - position978, tokenIndex978 := position, tokenIndex + position980, tokenIndex980 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l978 + goto l980 } position++ - goto l977 - l978: - position, tokenIndex = position978, tokenIndex978 + goto l979 + l980: + position, tokenIndex = position980, tokenIndex980 } if buffer[position] != rune(')') { - goto l970 + goto l972 } position++ if !_rules[ruleOffsetOperator]() { - goto l970 + goto l972 } if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l970 + goto l972 } position++ - l979: + l981: { - position980, tokenIndex980 := position, tokenIndex + position982, tokenIndex982 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l980 + goto l982 } position++ - goto l979 - l980: - position, tokenIndex = position980, tokenIndex980 + goto l981 + l982: + position, tokenIndex = position982, tokenIndex982 } { - position981, tokenIndex981 := position, tokenIndex + position983, tokenIndex983 := position, tokenIndex if buffer[position] != rune('x') { - goto l981 + goto l983 } position++ - goto l970 - l981: - position, tokenIndex = position981, tokenIndex981 + goto l972 + l983: + position, tokenIndex = position983, tokenIndex983 } goto l926 - l970: + l972: position, tokenIndex = position926, tokenIndex926 if buffer[position] != rune('(') { - goto l982 + goto l984 } position++ if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l982 + goto l984 } position++ - l983: + l985: { - position984, tokenIndex984 := position, tokenIndex + position986, tokenIndex986 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l984 + goto l986 } position++ - goto l983 - l984: - position, tokenIndex = position984, tokenIndex984 - } - { - position985, tokenIndex985 := position, tokenIndex - if !_rules[ruleWS]() { - goto l985 - } - goto l986 - l985: - position, tokenIndex = position985, tokenIndex985 - } - l986: - if !_rules[ruleOffsetOperator]() { - goto l982 + goto l985 + l986: + position, tokenIndex = position986, tokenIndex986 } { position987, tokenIndex987 := position, tokenIndex @@ -7858,27 +7855,40 @@ func (p *Asm) Init(options ...func(*Asm) error) error { position, tokenIndex = position987, tokenIndex987 } l988: + if !_rules[ruleOffsetOperator]() { + goto l984 + } + { + position989, tokenIndex989 := position, tokenIndex + if !_rules[ruleWS]() { + goto l989 + } + goto l990 + l989: + position, tokenIndex = position989, tokenIndex989 + } + l990: if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l982 + goto l984 } position++ - l989: + l991: { - position990, tokenIndex990 := position, tokenIndex + position992, tokenIndex992 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l990 + goto l992 } position++ - goto l989 - l990: - position, tokenIndex = position990, tokenIndex990 + goto l991 + l992: + position, tokenIndex = position992, tokenIndex992 } if buffer[position] != rune(')') { - goto l982 + goto l984 } position++ goto l926 - l982: + l984: position, tokenIndex = position926, tokenIndex926 if buffer[position] != rune('(') { goto l895 @@ -7888,92 +7898,92 @@ func (p *Asm) Init(options ...func(*Asm) error) error { goto l895 } position++ - l991: + l993: { - position992, tokenIndex992 := position, tokenIndex + position994, tokenIndex994 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l992 + goto l994 } position++ - goto l991 - l992: - position, tokenIndex = position992, tokenIndex992 + goto l993 + l994: + position, tokenIndex = position994, tokenIndex994 } { - position993, tokenIndex993 := position, tokenIndex + position995, tokenIndex995 := position, tokenIndex if !_rules[ruleWS]() { - goto l993 + goto l995 } - goto l994 - l993: - position, tokenIndex = position993, tokenIndex993 + goto l996 + l995: + position, tokenIndex = position995, tokenIndex995 } - l994: + l996: if !_rules[ruleOffsetOperator]() { goto l895 } { - position995, tokenIndex995 := position, tokenIndex + position997, tokenIndex997 := position, tokenIndex if !_rules[ruleWS]() { - goto l995 + goto l997 } - goto l996 - l995: - position, tokenIndex = position995, tokenIndex995 + goto l998 + l997: + position, tokenIndex = position997, tokenIndex997 } - l996: + l998: if c := buffer[position]; c < rune('0') || c > rune('9') { goto l895 } position++ - l997: + l999: { - position998, tokenIndex998 := position, tokenIndex + position1000, tokenIndex1000 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l998 + goto l1000 } position++ - goto l997 - l998: - position, tokenIndex = position998, tokenIndex998 + goto l999 + l1000: + position, tokenIndex = position1000, tokenIndex1000 } { - position999, tokenIndex999 := position, tokenIndex + position1001, tokenIndex1001 := position, tokenIndex if !_rules[ruleWS]() { - goto l999 + goto l1001 } - goto l1000 - l999: - position, tokenIndex = position999, tokenIndex999 + goto l1002 + l1001: + position, tokenIndex = position1001, tokenIndex1001 } - l1000: + l1002: if !_rules[ruleOffsetOperator]() { goto l895 } { - position1001, tokenIndex1001 := position, tokenIndex + position1003, tokenIndex1003 := position, tokenIndex if !_rules[ruleWS]() { - goto l1001 + goto l1003 } - goto l1002 - l1001: - position, tokenIndex = position1001, tokenIndex1001 + goto l1004 + l1003: + position, tokenIndex = position1003, tokenIndex1003 } - l1002: + l1004: if c := buffer[position]; c < rune('0') || c > rune('9') { goto l895 } position++ - l1003: + l1005: { - position1004, tokenIndex1004 := position, tokenIndex + position1006, tokenIndex1006 := position, tokenIndex if c := buffer[position]; c < rune('0') || c > rune('9') { - goto l1004 + goto l1006 } position++ - goto l1003 - l1004: - position, tokenIndex = position1004, tokenIndex1004 + goto l1005 + l1006: + position, tokenIndex = position1006, tokenIndex1006 } if buffer[position] != rune(')') { goto l895 @@ -7982,25 +7992,25 @@ func (p *Asm) Init(options ...func(*Asm) error) error { } l926: { - position1005, tokenIndex1005 := position, tokenIndex + position1007, tokenIndex1007 := position, tokenIndex { - position1006, tokenIndex1006 := position, tokenIndex + position1008, tokenIndex1008 := position, tokenIndex if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l1007 + goto l1009 } position++ - goto l1006 - l1007: - position, tokenIndex = position1006, tokenIndex1006 + goto l1008 + l1009: + position, tokenIndex = position1008, tokenIndex1008 if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l1005 + goto l1007 } position++ } - l1006: + l1008: goto l895 - l1005: - position, tokenIndex = position1005, tokenIndex1005 + l1007: + position, tokenIndex = position1007, tokenIndex1007 } } l901: @@ -8018,67 +8028,67 @@ func (p *Asm) Init(options ...func(*Asm) error) error { if memoized, ok := memoization[memoKey{58, position}]; ok { return memoizedResult(memoized) } - position1008, tokenIndex1008 := position, tokenIndex + position1010, tokenIndex1010 := position, tokenIndex { - position1009 := position + position1011 := position { - position1012, tokenIndex1012 := position, tokenIndex + position1014, tokenIndex1014 := position, tokenIndex if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l1013 + goto l1015 } position++ - goto l1012 - l1013: - position, tokenIndex = position1012, tokenIndex1012 + goto l1014 + l1015: + position, tokenIndex = position1014, tokenIndex1014 if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l1014 + goto l1016 } position++ - goto l1012 - l1014: - position, tokenIndex = position1012, tokenIndex1012 + goto l1014 + l1016: + position, tokenIndex = position1014, tokenIndex1014 if buffer[position] != rune('@') { - goto l1008 + goto l1010 } position++ } + l1014: l1012: - l1010: { - position1011, tokenIndex1011 := position, tokenIndex + position1013, tokenIndex1013 := position, tokenIndex { - position1015, tokenIndex1015 := position, tokenIndex + position1017, tokenIndex1017 := position, tokenIndex if c := buffer[position]; c < rune('a') || c > rune('z') { - goto l1016 + goto l1018 } position++ - goto l1015 - l1016: - position, tokenIndex = position1015, tokenIndex1015 + goto l1017 + l1018: + position, tokenIndex = position1017, tokenIndex1017 if c := buffer[position]; c < rune('A') || c > rune('Z') { - goto l1017 + goto l1019 } position++ - goto l1015 - l1017: - position, tokenIndex = position1015, tokenIndex1015 + goto l1017 + l1019: + position, tokenIndex = position1017, tokenIndex1017 if buffer[position] != rune('@') { - goto l1011 + goto l1013 } position++ } - l1015: - goto l1010 - l1011: - position, tokenIndex = position1011, tokenIndex1011 + l1017: + goto l1012 + l1013: + position, tokenIndex = position1013, tokenIndex1013 } - add(ruleSection, position1009) + add(ruleSection, position1011) } - memoize(58, position1008, tokenIndex1008, true) + memoize(58, position1010, tokenIndex1010, true) return true - l1008: - memoize(58, position1008, tokenIndex1008, false) - position, tokenIndex = position1008, tokenIndex1008 + l1010: + memoize(58, position1010, tokenIndex1010, false) + position, tokenIndex = position1010, tokenIndex1010 return false }, /* 59 SegmentRegister <- <('%' ([c-g] / 's') ('s' ':'))> */ @@ -8086,43 +8096,43 @@ func (p *Asm) Init(options ...func(*Asm) error) error { if memoized, ok := memoization[memoKey{59, position}]; ok { return memoizedResult(memoized) } - position1018, tokenIndex1018 := position, tokenIndex + position1020, tokenIndex1020 := position, tokenIndex { - position1019 := position + position1021 := position if buffer[position] != rune('%') { - goto l1018 + goto l1020 } position++ { - position1020, tokenIndex1020 := position, tokenIndex + position1022, tokenIndex1022 := position, tokenIndex if c := buffer[position]; c < rune('c') || c > rune('g') { - goto l1021 + goto l1023 } position++ - goto l1020 - l1021: - position, tokenIndex = position1020, tokenIndex1020 + goto l1022 + l1023: + position, tokenIndex = position1022, tokenIndex1022 if buffer[position] != rune('s') { - goto l1018 + goto l1020 } position++ } - l1020: + l1022: if buffer[position] != rune('s') { - goto l1018 + goto l1020 } position++ if buffer[position] != rune(':') { - goto l1018 + goto l1020 } position++ - add(ruleSegmentRegister, position1019) + add(ruleSegmentRegister, position1021) } - memoize(59, position1018, tokenIndex1018, true) + memoize(59, position1020, tokenIndex1020, true) return true - l1018: - memoize(59, position1018, tokenIndex1018, false) - position, tokenIndex = position1018, tokenIndex1018 + l1020: + memoize(59, position1020, tokenIndex1020, false) + position, tokenIndex = position1020, tokenIndex1020 return false }, }