Skip to content

Commit

Permalink
Avoid out of range pc-relative fixup value (aws#1454)
Browse files Browse the repository at this point in the history
### Description of changes: 
* Addresses failure [seen
here](https://github.com/aws/aws-lc-rs/actions/runs/8019806331/job/21908336880?pr=340#step:8:480)
on `arm-linux-androideabi`.
```
  /tmp/sha256-armv4-f3213e.s:1455:2: error: out of range pc-relative fixup value
   adr r14,K256
   ^
```
* Fix extracted from this [recent upstream
commit](google/boringssl@12316ab).
* Extended `aws-lc-rs` CI test to provide better coverage of the older
arm (< v7) cpus.


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license and the ISC license.
  • Loading branch information
justsmth authored Feb 29, 2024
1 parent 67cf4cc commit 7600809
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 8 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/aws-lc-rs.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: aws-lc-rs sanity tests
name: aws-lc-rs tests
on:
push:
branches: [ '*' ]
Expand All @@ -13,7 +13,6 @@ jobs:
standard:
runs-on: ubuntu-latest
steps:

- uses: actions/checkout@v3
with:
repository: awslabs/aws-lc-rs
Expand Down Expand Up @@ -67,3 +66,8 @@ jobs:
working-directory: ./aws-lc-rs/aws-lc-rs
run: |
cargo test
- name: Install cross
run: cargo install cross --git https://github.com/cross-rs/cross
- name: Cross-compile arm-linux-androideabi
working-directory: ./aws-lc-rs/aws-lc-rs
run: cross test --release --features bindgen,unstable --target arm-linux-androideabi
40 changes: 38 additions & 2 deletions crypto/fipsmodule/sha/asm/sha256-armv4.pl
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,14 @@ ()
.arch armv7-a
.fpu neon
.LK256_shortcut_neon:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_neon+4)
#else
.word K256-(.LK256_add_neon+8)
#endif
.global sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
.align 5
Expand All @@ -491,7 +499,21 @@ ()
stmdb sp!,{r4-r12,lr}
sub $H,sp,#16*4+16
adr $Ktbl,K256
@ K256 is just at the boundary of being easily referenced by an ADR from
@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
@ not fit. By moving code around, we could make it fit, but this is too
@ fragile. For simplicity, just load the offset from
@ .LK256_shortcut_neon.
@
@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
@ support it. We might be able to emulate it with a macro, but Android's
@ did not work when I tried it.
@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
ldr $Ktbl,.LK256_shortcut_neon
.LK256_add_neon:
add $Ktbl,pc,$Ktbl
bic $H,$H,#15 @ align for 128-bit stores
mov $t2,sp
mov sp,$H @ alloca
Expand Down Expand Up @@ -617,12 +639,26 @@ ()
# define INST(a,b,c,d) .byte a,b,c,d
# endif
.LK256_shortcut_armv8:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_armv8+4)
#else
.word K256-(.LK256_add_armv8+8)
#endif
.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
.LARMv8:
@ K256 is too far to reference from one ADR command in Thumb mode. In
@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
ldr $Ktbl,.LK256_shortcut_armv8
.LK256_add_armv8:
add $Ktbl,pc,$Ktbl
vld1.32 {$ABCD,$EFGH},[$ctx]
sub $Ktbl,$Ktbl,#256+32
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
b .Loop_v8
Expand Down
40 changes: 38 additions & 2 deletions generated-src/ios-arm/crypto/fipsmodule/sha256-armv4.S
Original file line number Diff line number Diff line change
Expand Up @@ -1889,6 +1889,14 @@ Lrounds_16_xx:



LK256_shortcut_neon:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(LK256_add_neon+4)
#else
.word K256-(LK256_add_neon+8)
#endif

.globl _sha256_block_data_order_neon
.private_extern _sha256_block_data_order_neon
#ifdef __thumb2__
Expand All @@ -1901,7 +1909,21 @@ LNEON:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}

sub r11,sp,#16*4+16
adr r14,K256

@ K256 is just at the boundary of being easily referenced by an ADR from
@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
@ not fit. By moving code around, we could make it fit, but this is too
@ fragile. For simplicity, just load the offset from
@ .LK256_shortcut_neon.
@
@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
@ support it. We might be able to emulate it with a macro, but Android's
@ did not work when I tried it.
@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
ldr r14,LK256_shortcut_neon
LK256_add_neon:
add r14,pc,r14

bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp
mov sp,r11 @ alloca
Expand Down Expand Up @@ -2683,14 +2705,28 @@ L_00_48:
# define INST(a,b,c,d) .byte a,b,c,d
# endif

LK256_shortcut_armv8:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(LK256_add_armv8+4)
#else
.word K256-(LK256_add_armv8+8)
#endif

#ifdef __thumb2__
.thumb_func sha256_block_data_order_armv8
#endif
.align 5
sha256_block_data_order_armv8:
LARMv8:
@ K256 is too far to reference from one ADR command in Thumb mode. In
@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
ldr r3,LK256_shortcut_armv8
LK256_add_armv8:
add r3,pc,r3

vld1.32 {q0,q1},[r0]
sub r3,r3,#256+32
add r2,r1,r2,lsl#6 @ len to point at the end of inp
b Loop_v8

Expand Down
40 changes: 38 additions & 2 deletions generated-src/linux-arm/crypto/fipsmodule/sha256-armv4.S
Original file line number Diff line number Diff line change
Expand Up @@ -1887,6 +1887,14 @@ sha256_block_data_order:
.arch armv7-a
.fpu neon

.LK256_shortcut_neon:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_neon+4)
#else
.word K256-(.LK256_add_neon+8)
#endif

.globl sha256_block_data_order_neon
.hidden sha256_block_data_order_neon
.type sha256_block_data_order_neon,%function
Expand All @@ -1897,7 +1905,21 @@ sha256_block_data_order_neon:
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}

sub r11,sp,#16*4+16
adr r14,K256

@ K256 is just at the boundary of being easily referenced by an ADR from
@ this function. In Arm mode, when building with __ARM_ARCH=6, it does
@ not fit. By moving code around, we could make it fit, but this is too
@ fragile. For simplicity, just load the offset from
@ .LK256_shortcut_neon.
@
@ TODO(davidben): adrl would avoid a load, but clang-assembler does not
@ support it. We might be able to emulate it with a macro, but Android's
@ did not work when I tried it.
@ https://android.googlesource.com/platform/ndk/+/refs/heads/master/docs/ClangMigration.md#arm
ldr r14,.LK256_shortcut_neon
.LK256_add_neon:
add r14,pc,r14

bic r11,r11,#15 @ align for 128-bit stores
mov r12,sp
mov sp,r11 @ alloca
Expand Down Expand Up @@ -2679,12 +2701,26 @@ sha256_block_data_order_neon:
# define INST(a,b,c,d) .byte a,b,c,d
# endif

.LK256_shortcut_armv8:
@ PC is 8 bytes ahead in Arm mode and 4 bytes ahead in Thumb mode.
#if defined(__thumb2__)
.word K256-(.LK256_add_armv8+4)
#else
.word K256-(.LK256_add_armv8+8)
#endif

.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
.LARMv8:
@ K256 is too far to reference from one ADR command in Thumb mode. In
@ Arm mode, we could make it fit by aligning the ADR offset to a 64-byte
@ boundary. For simplicity, just load the offset from .LK256_shortcut_armv8.
ldr r3,.LK256_shortcut_armv8
.LK256_add_armv8:
add r3,pc,r3

vld1.32 {q0,q1},[r0]
sub r3,r3,#256+32
add r2,r1,r2,lsl#6 @ len to point at the end of inp
b .Loop_v8

Expand Down

0 comments on commit 7600809

Please sign in to comment.