diff --git a/crypto/fipsmodule/cipher/e_aes.cc.inc b/crypto/fipsmodule/cipher/e_aes.cc.inc index 346cd8b0a6..a9d8960621 100644 --- a/crypto/fipsmodule/cipher/e_aes.cc.inc +++ b/crypto/fipsmodule/cipher/e_aes.cc.inc @@ -241,35 +241,9 @@ static int aes_ofb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, return 1; } -#if defined(OPENSSL_32_BIT) -#define EVP_AES_GCM_CTX_PADDING (4 + 8) -#else -#define EVP_AES_GCM_CTX_PADDING 8 -#endif - -static EVP_AES_GCM_CTX *aes_gcm_from_cipher_ctx(EVP_CIPHER_CTX *ctx) { - static_assert( - alignof(EVP_AES_GCM_CTX) <= 16, - "EVP_AES_GCM_CTX needs more alignment than this function provides"); - - // |malloc| guarantees up to 4-byte alignment on 32-bit and 8-byte alignment - // on 64-bit systems, so we need to adjust to reach 16-byte alignment. - assert(ctx->cipher->ctx_size == - sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING); - - char *ptr = reinterpret_cast(ctx->cipher_data); -#if defined(OPENSSL_32_BIT) - assert((uintptr_t)ptr % 4 == 0); - ptr += (uintptr_t)ptr & 4; -#endif - assert((uintptr_t)ptr % 8 == 0); - ptr += (uintptr_t)ptr & 8; - return (EVP_AES_GCM_CTX *)ptr; -} - static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, const uint8_t *iv, int enc) { - EVP_AES_GCM_CTX *gctx = aes_gcm_from_cipher_ctx(ctx); + EVP_AES_GCM_CTX *gctx = reinterpret_cast(ctx->cipher_data); if (!iv && !key) { return 1; } @@ -312,7 +286,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, } static void aes_gcm_cleanup(EVP_CIPHER_CTX *c) { - EVP_AES_GCM_CTX *gctx = aes_gcm_from_cipher_ctx(c); + EVP_AES_GCM_CTX *gctx = reinterpret_cast(c->cipher_data); OPENSSL_cleanse(&gctx->key, sizeof(gctx->key)); OPENSSL_cleanse(&gctx->gcm, sizeof(gctx->gcm)); if (gctx->iv != c->iv) { @@ -321,7 +295,7 @@ static void aes_gcm_cleanup(EVP_CIPHER_CTX *c) { } static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { - EVP_AES_GCM_CTX *gctx = aes_gcm_from_cipher_ctx(c); + EVP_AES_GCM_CTX *gctx = reinterpret_cast(c->cipher_data); switch (type) { case EVP_CTRL_INIT: gctx->key_set = 0; @@ -421,10 +395,8 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { case EVP_CTRL_COPY: { EVP_CIPHER_CTX *out = reinterpret_cast(ptr); - EVP_AES_GCM_CTX *gctx_out = aes_gcm_from_cipher_ctx(out); - // |EVP_CIPHER_CTX_copy| copies this generically, but we must redo it in - // case |out->cipher_data| and |in->cipher_data| are differently aligned. - OPENSSL_memcpy(gctx_out, gctx, sizeof(EVP_AES_GCM_CTX)); + EVP_AES_GCM_CTX *gctx_out = + reinterpret_cast(out->cipher_data); if (gctx->iv == c->iv) { gctx_out->iv = out->iv; } else { @@ -444,7 +416,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) { static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, size_t len) { - EVP_AES_GCM_CTX *gctx = aes_gcm_from_cipher_ctx(ctx); + EVP_AES_GCM_CTX *gctx = reinterpret_cast(ctx->cipher_data); // If not set up, return error if (!gctx->key_set) { @@ -552,7 +524,7 @@ DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_aes_128_gcm) { out->block_size = 1; out->key_len = 16; out->iv_len = AES_GCM_NONCE_LENGTH; - out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; + out->ctx_size = sizeof(EVP_AES_GCM_CTX); out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; @@ -620,7 +592,7 @@ DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_aes_192_gcm) { out->block_size = 1; out->key_len = 24; out->iv_len = AES_GCM_NONCE_LENGTH; - out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; + out->ctx_size = sizeof(EVP_AES_GCM_CTX); out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; @@ -688,7 +660,7 @@ DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_aes_256_gcm) { out->block_size = 1; out->key_len = 32; out->iv_len = AES_GCM_NONCE_LENGTH; - out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING; + out->ctx_size = sizeof(EVP_AES_GCM_CTX); out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY | EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_CTRL_INIT | EVP_CIPH_FLAG_AEAD_CIPHER; diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl index 045c6cbaf7..793c94f9cb 100644 --- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl +++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86.pl @@ -100,7 +100,7 @@ sub process_rows { &mov("eax", $rows); &set_label("loop_row_$call_counter"); - &movdqa("xmm4", &QWP(0, $Htable)); + &movdqu("xmm4", &QWP(0, $Htable)); &lea($Htable, &DWP(16, $Htable)); # Right-shift xmm2 and xmm3 by 8 bytes. diff --git a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl index e7590b49cf..20a2abbee4 100644 --- a/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-ssse3-x86_64.pl @@ -156,7 +156,7 @@ sub process_rows { return <<____; movq \$$rows, %rax .Loop_row_$call_counter: - movdqa ($Htable), %xmm4 + movdqu ($Htable), %xmm4 leaq 16($Htable), $Htable # Right-shift %xmm2 and %xmm3 by 8 bytes. diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h index 1878121047..a77aaea0b9 100644 --- a/crypto/fipsmodule/modes/internal.h +++ b/crypto/fipsmodule/modes/internal.h @@ -126,10 +126,6 @@ typedef void (*ghash_func)(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp, size_t len); typedef struct gcm128_key_st { - // |gcm_*_ssse3| require a 16-byte-aligned |Htable| when hashing data, but not - // initialization. |GCM128_KEY| is not itself aligned to simplify embedding in - // |EVP_AEAD_CTX|, but |Htable|'s offset must be a multiple of 16. - // TODO(crbug.com/boringssl/604): Revisit this. u128 Htable[16]; gmult_func gmult; ghash_func ghash; @@ -223,8 +219,6 @@ void gcm_gmult_clmul(uint8_t Xi[16], const u128 Htable[16]); void gcm_ghash_clmul(uint8_t Xi[16], const u128 Htable[16], const uint8_t *inp, size_t len); -// |gcm_gmult_ssse3| and |gcm_ghash_ssse3| require |Htable| to be -// 16-byte-aligned, but |gcm_init_ssse3| does not. void gcm_init_ssse3(u128 Htable[16], const uint64_t Xi[2]); void gcm_gmult_ssse3(uint8_t Xi[16], const u128 Htable[16]); void gcm_ghash_ssse3(uint8_t Xi[16], const u128 Htable[16], const uint8_t *in, @@ -382,9 +376,7 @@ size_t CRYPTO_cts128_encrypt_block(const uint8_t *in, uint8_t *out, size_t len, struct polyval_ctx { uint8_t S[16]; - // |gcm_*_ssse3| require |Htable| to be 16-byte-aligned. - // TODO(crbug.com/boringssl/604): Revisit this. - alignas(16) u128 Htable[16]; + u128 Htable[16]; gmult_func gmult; ghash_func ghash; }; diff --git a/gen/bcm/ghash-ssse3-x86-apple.S b/gen/bcm/ghash-ssse3-x86-apple.S index 24b1f2f309..96cb86f067 100644 --- a/gen/bcm/ghash-ssse3-x86-apple.S +++ b/gen/bcm/ghash-ssse3-x86-apple.S @@ -31,7 +31,7 @@ L000pic_point: pxor %xmm3,%xmm3 movl $5,%eax L001loop_row_1: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -62,7 +62,7 @@ L001loop_row_1: pxor %xmm3,%xmm3 movl $5,%eax L002loop_row_2: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -93,7 +93,7 @@ L002loop_row_2: pxor %xmm3,%xmm3 movl $6,%eax L003loop_row_3: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -169,7 +169,7 @@ L005loop_ghash: pxor %xmm2,%xmm2 movl $5,%eax L006loop_row_4: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -200,7 +200,7 @@ L006loop_row_4: pxor %xmm3,%xmm3 movl $5,%eax L007loop_row_5: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -231,7 +231,7 @@ L007loop_row_5: pxor %xmm3,%xmm3 movl $6,%eax L008loop_row_6: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 diff --git a/gen/bcm/ghash-ssse3-x86-linux.S b/gen/bcm/ghash-ssse3-x86-linux.S index 445db3b4f7..7fe65e76bf 100644 --- a/gen/bcm/ghash-ssse3-x86-linux.S +++ b/gen/bcm/ghash-ssse3-x86-linux.S @@ -32,7 +32,7 @@ gcm_gmult_ssse3: pxor %xmm3,%xmm3 movl $5,%eax .L001loop_row_1: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -63,7 +63,7 @@ gcm_gmult_ssse3: pxor %xmm3,%xmm3 movl $5,%eax .L002loop_row_2: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -94,7 +94,7 @@ gcm_gmult_ssse3: pxor %xmm3,%xmm3 movl $6,%eax .L003loop_row_3: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -172,7 +172,7 @@ gcm_ghash_ssse3: pxor %xmm2,%xmm2 movl $5,%eax .L006loop_row_4: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -203,7 +203,7 @@ gcm_ghash_ssse3: pxor %xmm3,%xmm3 movl $5,%eax .L007loop_row_5: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 @@ -234,7 +234,7 @@ gcm_ghash_ssse3: pxor %xmm3,%xmm3 movl $6,%eax .L008loop_row_6: - movdqa (%esi),%xmm4 + movdqu (%esi),%xmm4 leal 16(%esi),%esi movdqa %xmm2,%xmm6 .byte 102,15,58,15,243,1 diff --git a/gen/bcm/ghash-ssse3-x86-win.asm b/gen/bcm/ghash-ssse3-x86-win.asm index 52108aacc1..201ef5751d 100644 --- a/gen/bcm/ghash-ssse3-x86-win.asm +++ b/gen/bcm/ghash-ssse3-x86-win.asm @@ -38,7 +38,7 @@ db 102,15,56,0,199 pxor xmm3,xmm3 mov eax,5 L$001loop_row_1: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 @@ -69,7 +69,7 @@ db 102,15,56,0,233 pxor xmm3,xmm3 mov eax,5 L$002loop_row_2: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 @@ -100,7 +100,7 @@ db 102,15,56,0,233 pxor xmm3,xmm3 mov eax,6 L$003loop_row_3: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 @@ -175,7 +175,7 @@ db 102,15,56,0,207 pxor xmm2,xmm2 mov eax,5 L$006loop_row_4: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 @@ -206,7 +206,7 @@ db 102,15,56,0,233 pxor xmm3,xmm3 mov eax,5 L$007loop_row_5: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 @@ -237,7 +237,7 @@ db 102,15,56,0,233 pxor xmm3,xmm3 mov eax,6 L$008loop_row_6: - movdqa xmm4,[esi] + movdqu xmm4,[esi] lea esi,[16+esi] movdqa xmm6,xmm2 db 102,15,58,15,243,1 diff --git a/gen/bcm/ghash-ssse3-x86_64-apple.S b/gen/bcm/ghash-ssse3-x86_64-apple.S index bcbf824f4a..651cca321d 100644 --- a/gen/bcm/ghash-ssse3-x86_64-apple.S +++ b/gen/bcm/ghash-ssse3-x86_64-apple.S @@ -38,7 +38,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $5,%rax L$oop_row_1: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -86,7 +86,7 @@ L$oop_row_1: pxor %xmm3,%xmm3 movq $5,%rax L$oop_row_2: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -134,7 +134,7 @@ L$oop_row_2: pxor %xmm3,%xmm3 movq $6,%rax L$oop_row_3: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -241,7 +241,7 @@ L$oop_ghash: movq $5,%rax L$oop_row_4: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -289,7 +289,7 @@ L$oop_row_4: pxor %xmm3,%xmm3 movq $5,%rax L$oop_row_5: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -337,7 +337,7 @@ L$oop_row_5: pxor %xmm3,%xmm3 movq $6,%rax L$oop_row_6: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi diff --git a/gen/bcm/ghash-ssse3-x86_64-linux.S b/gen/bcm/ghash-ssse3-x86_64-linux.S index 2acb448953..84ac20adf5 100644 --- a/gen/bcm/ghash-ssse3-x86_64-linux.S +++ b/gen/bcm/ghash-ssse3-x86_64-linux.S @@ -38,7 +38,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $5,%rax .Loop_row_1: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -86,7 +86,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $5,%rax .Loop_row_2: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -134,7 +134,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $6,%rax .Loop_row_3: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -241,7 +241,7 @@ _CET_ENDBR movq $5,%rax .Loop_row_4: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -289,7 +289,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $5,%rax .Loop_row_5: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi @@ -337,7 +337,7 @@ _CET_ENDBR pxor %xmm3,%xmm3 movq $6,%rax .Loop_row_6: - movdqa (%rsi),%xmm4 + movdqu (%rsi),%xmm4 leaq 16(%rsi),%rsi diff --git a/gen/bcm/ghash-ssse3-x86_64-win.asm b/gen/bcm/ghash-ssse3-x86_64-win.asm index e0de962438..c00e039176 100644 --- a/gen/bcm/ghash-ssse3-x86_64-win.asm +++ b/gen/bcm/ghash-ssse3-x86_64-win.asm @@ -52,7 +52,7 @@ DB 102,65,15,56,0,194 pxor xmm3,xmm3 mov rax,5 $L$oop_row_1: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx] @@ -100,7 +100,7 @@ DB 102,15,56,0,233 pxor xmm3,xmm3 mov rax,5 $L$oop_row_2: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx] @@ -148,7 +148,7 @@ DB 102,15,56,0,233 pxor xmm3,xmm3 mov rax,6 $L$oop_row_3: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx] @@ -266,7 +266,7 @@ DB 102,65,15,56,0,202 mov rax,5 $L$oop_row_4: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx] @@ -314,7 +314,7 @@ DB 102,15,56,0,233 pxor xmm3,xmm3 mov rax,5 $L$oop_row_5: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx] @@ -362,7 +362,7 @@ DB 102,15,56,0,233 pxor xmm3,xmm3 mov rax,6 $L$oop_row_6: - movdqa xmm4,XMMWORD[rdx] + movdqu xmm4,XMMWORD[rdx] lea rdx,[16+rdx]