-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmemmove-lsx.S
101 lines (88 loc) · 1.67 KB
/
memmove-lsx.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
============================================================================
Name : memmove-lsx.S
Author : hev <r@hev.cc>
Copyright : Copyright (c) 2023 hev
Description : Memmove LSX
============================================================================
*/
#include "regdef.h"
#include "lsx.h"
.text
/*
* void *memmove(void *dst, const void *src, size_t n)
*
* a0: dst
* a1: src
* a2: n
*/
.align 6
.global memmove
memmove:
blt a0, a1, memcpy
sltui t0, a2, 49
bnez t0, __memcpy_small
add.d a3, a1, a2
add.d a2, a0, a2
vld vr8, a1, 0
vld vr9, a3, -16
/* align up destination address */
andi t1, a2, 15
sub.d a3, a3, t1
sub.d a5, a2, t1
addi.d a4, a1, 128
bgeu a4, a3, .Llt128
/* copy 128 bytes at a time */
.Lloop128:
vld vr0, a3, -16
vld vr1, a3, -32
vld vr2, a3, -48
vld vr3, a3, -64
vld vr4, a3, -80
vld vr5, a3, -96
vld vr6, a3, -112
vld vr7, a3, -128
addi.d a3, a3, -128
vst vr0, a5, -16
vst vr1, a5, -32
vst vr2, a5, -48
vst vr3, a5, -64
vst vr4, a5, -80
vst vr5, a5, -96
vst vr6, a5, -112
vst vr7, a5, -128
addi.d a5, a5, -128
bltu a4, a3, .Lloop128
/* copy the remaining bytes */
.Llt128:
addi.d a4, a1, 64
bgeu a4, a3, .Llt64
vld vr0, a3, -16
vld vr1, a3, -32
vld vr2, a3, -48
vld vr3, a3, -64
addi.d a3, a3, -64
vst vr0, a5, -16
vst vr1, a5, -32
vst vr2, a5, -48
vst vr3, a5, -64
addi.d a5, a5, -64
.Llt64:
addi.d a4, a1, 32
bgeu a4, a3, .Llt32
vld vr0, a3, -16
vld vr1, a3, -32
addi.d a3, a3, -32
vst vr0, a5, -16
vst vr1, a5, -32
addi.d a5, a5, -32
.Llt32:
addi.d a4, a1, 16
bgeu a4, a3, .Llt16
vld vr0, a3, -16
vst vr0, a5, -16
.Llt16:
vst vr8, a0, 0
vst vr9, a2, -16
/* return */
jr ra