diff --git a/aes/XTS_AES_128_dec_expanded_key_vaes.asm b/aes/XTS_AES_128_dec_expanded_key_vaes.asm index 7904ec5b..be5b3d15 100644 --- a/aes/XTS_AES_128_dec_expanded_key_vaes.asm +++ b/aes/XTS_AES_128_dec_expanded_key_vaes.asm @@ -973,15 +973,14 @@ XTS_AES_128_dec_expanded_key_vaes: %endif cmp N_val, 128 - jl _less_than_128_bytes + jb _less_than_128_bytes vpbroadcastq zpoly, ghash_poly_8b cmp N_val, 256 jge _start_by16 - cmp N_val, 128 - jge _start_by8 + jmp _start_by8 _do_n_blocks: cmp N_val, 0 @@ -1009,7 +1008,7 @@ _do_n_blocks: jge _remaining_num_blocks_is_1 ;; _remaining_num_blocks_is_0: - vmovdqu xmm1, [ptr_plaintext - 16] ; Re-due last block with next tweak + vmovdqu xmm1, xmm5 ; xmm5 contains last full block to decrypt with next teawk decrypt_initial xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, na, na, na, na, na, na, xmm0, 1, 1 vmovdqu [ptr_ciphertext - 16], xmm1 vmovdqa xmm8, xmm1 @@ -1219,6 +1218,7 @@ _main_loop_run_16: vmovdqu8 zmm2, [ptr_plaintext+16*4] vmovdqu8 zmm3, [ptr_plaintext+16*8] vmovdqu8 zmm4, [ptr_plaintext+16*12] + vmovdqu8 xmm5, [ptr_plaintext+16*15] ; Save last full block in case this is the last iteration add ptr_plaintext, 256 decrypt_by_16_zmm zmm1, zmm2, zmm3, zmm4, zmm9, zmm10, zmm11, zmm12, zmm0, 0 @@ -1262,6 +1262,7 @@ _start_by8: _main_loop_run_8: vmovdqu8 zmm1, [ptr_plaintext+16*0] vmovdqu8 zmm2, [ptr_plaintext+16*4] + vmovdqu8 xmm5, [ptr_plaintext+16*7] ; Save last full block in case this is the last iteration add ptr_plaintext, 128 decrypt_by_eight_zmm zmm1, zmm2, zmm9, zmm10, zmm0, 0 diff --git a/aes/XTS_AES_128_dec_vaes.asm b/aes/XTS_AES_128_dec_vaes.asm index 72d1d955..bfe3d213 100644 --- a/aes/XTS_AES_128_dec_vaes.asm +++ b/aes/XTS_AES_128_dec_vaes.asm @@ -1006,15 +1006,14 @@ XTS_AES_128_dec_vaes: %endif cmp N_val, 128 - jl _less_than_128_bytes + jb _less_than_128_bytes vpbroadcastq zpoly, ghash_poly_8b cmp N_val, 256 jge _start_by16 - cmp N_val, 128 - jge _start_by8 + jmp _start_by8 _do_n_blocks: cmp N_val, 0 @@ -1042,7 +1041,7 @@ _do_n_blocks: jge _remaining_num_blocks_is_1 ;; _remaining_num_blocks_is_0: - vmovdqu xmm1, [ptr_plaintext - 16] ; Re-due last block with next tweak + vmovdqu xmm1, xmm5 ; xmm5 contains last full block to decrypt with next teawk decrypt_initial xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, na, na, na, na, na, na, xmm0, 1, 1 vmovdqu [ptr_ciphertext - 16], xmm1 vmovdqa xmm8, xmm1 @@ -1252,6 +1251,7 @@ _main_loop_run_16: vmovdqu8 zmm2, [ptr_plaintext+16*4] vmovdqu8 zmm3, [ptr_plaintext+16*8] vmovdqu8 zmm4, [ptr_plaintext+16*12] + vmovdqu8 xmm5, [ptr_plaintext+16*15] ; Save last full block in case this is the last iteration add ptr_plaintext, 256 decrypt_by_16_zmm zmm1, zmm2, zmm3, zmm4, zmm9, zmm10, zmm11, zmm12, zmm0, 0 @@ -1295,6 +1295,7 @@ _start_by8: _main_loop_run_8: vmovdqu8 zmm1, [ptr_plaintext+16*0] vmovdqu8 zmm2, [ptr_plaintext+16*4] + vmovdqu8 xmm5, [ptr_plaintext+16*7] ; Save last full block in case this is the last iteration add ptr_plaintext, 128 decrypt_by_eight_zmm zmm1, zmm2, zmm9, zmm10, zmm0, 0 diff --git a/aes/XTS_AES_256_dec_expanded_key_vaes.asm b/aes/XTS_AES_256_dec_expanded_key_vaes.asm index be76a75f..62374aed 100644 --- a/aes/XTS_AES_256_dec_expanded_key_vaes.asm +++ b/aes/XTS_AES_256_dec_expanded_key_vaes.asm @@ -1129,15 +1129,14 @@ XTS_AES_256_dec_expanded_key_vaes: %endif cmp N_val, 128 - jl _less_than_128_bytes + jb _less_than_128_bytes vpbroadcastq zpoly, ghash_poly_8b cmp N_val, 256 jge _start_by16 - cmp N_val, 128 - jge _start_by8 + jmp _start_by8 _do_n_blocks: cmp N_val, 0 @@ -1165,7 +1164,7 @@ _do_n_blocks: jge _remaining_num_blocks_is_1 ;; _remaining_num_blocks_is_0: - vmovdqu xmm1, [ptr_plaintext - 16] ; Re-due last block with next tweak + vmovdqu xmm1, xmm5 ; xmm5 contains last full block to decrypt with next teawk decrypt_initial xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, na, na, na, na, na, na, xmm0, 1, 1 vmovdqu [ptr_ciphertext - 16], xmm1 vmovdqa xmm8, xmm1 @@ -1375,6 +1374,7 @@ _main_loop_run_16: vmovdqu8 zmm2, [ptr_plaintext+16*4] vmovdqu8 zmm3, [ptr_plaintext+16*8] vmovdqu8 zmm4, [ptr_plaintext+16*12] + vmovdqu8 xmm5, [ptr_plaintext+16*15] ; Save last full block in case this is the last iteration add ptr_plaintext, 256 decrypt_by_16_zmm zmm1, zmm2, zmm3, zmm4, zmm9, zmm10, zmm11, zmm12, zmm0, 0 @@ -1418,6 +1418,7 @@ _start_by8: _main_loop_run_8: vmovdqu8 zmm1, [ptr_plaintext+16*0] vmovdqu8 zmm2, [ptr_plaintext+16*4] + vmovdqu8 xmm5, [ptr_plaintext+16*7] ; Save last full block in case this is the last iteration add ptr_plaintext, 128 decrypt_by_eight_zmm zmm1, zmm2, zmm9, zmm10, zmm0, 0 diff --git a/aes/XTS_AES_256_dec_vaes.asm b/aes/XTS_AES_256_dec_vaes.asm index 7b06f628..79be8de0 100644 --- a/aes/XTS_AES_256_dec_vaes.asm +++ b/aes/XTS_AES_256_dec_vaes.asm @@ -1196,15 +1196,14 @@ XTS_AES_256_dec_vaes: %endif cmp N_val, 128 - jl _less_than_128_bytes + jb _less_than_128_bytes vpbroadcastq zpoly, ghash_poly_8b cmp N_val, 256 jge _start_by16 - cmp N_val, 128 - jge _start_by8 + jmp _start_by8 _do_n_blocks: cmp N_val, 0 @@ -1232,7 +1231,7 @@ _do_n_blocks: jge _remaining_num_blocks_is_1 ;; _remaining_num_blocks_is_0: - vmovdqu xmm1, [ptr_plaintext - 16] ; Re-due last block with next tweak + vmovdqu xmm1, xmm5 ; xmm5 contains last full block to decrypt with next teawk decrypt_initial xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, na, na, na, na, na, na, xmm0, 1, 1 vmovdqu [ptr_ciphertext - 16], xmm1 vmovdqa xmm8, xmm1 @@ -1442,6 +1441,7 @@ _main_loop_run_16: vmovdqu8 zmm2, [ptr_plaintext+16*4] vmovdqu8 zmm3, [ptr_plaintext+16*8] vmovdqu8 zmm4, [ptr_plaintext+16*12] + vmovdqu8 xmm5, [ptr_plaintext+16*15] ; Save last full block in case this is the last iteration add ptr_plaintext, 256 decrypt_by_16_zmm zmm1, zmm2, zmm3, zmm4, zmm9, zmm10, zmm11, zmm12, zmm0, 0 @@ -1485,6 +1485,7 @@ _start_by8: _main_loop_run_8: vmovdqu8 zmm1, [ptr_plaintext+16*0] vmovdqu8 zmm2, [ptr_plaintext+16*4] + vmovdqu8 xmm5, [ptr_plaintext+16*7] ; Save last full block in case this is the last iteration add ptr_plaintext, 128 decrypt_by_eight_zmm zmm1, zmm2, zmm9, zmm10, zmm0, 0