-
Notifications
You must be signed in to change notification settings - Fork 12
/
memset.S
97 lines (90 loc) · 1.91 KB
/
memset.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#if 0
void *memset(void *dst, int n, size_t len) {
unsigned char *d = dst;
vuint8m8_t v = __riscv_vmv_v_x_u8m8((uint8_t)n, __riscv_vsetvlmax_e8m8());
for (size_t vl; len > 0; len -= vl, d += vl) {
vl = __riscv_vsetvl_e8m8(len);
__riscv_vse8_v_u8m8(d, v, vl);
}
return dst;
}
#endif
#ifdef MX
.global MX(memset_rvv_)
MX(memset_rvv_):
vsetvli a3, zero, e8, MX(), ta, ma
vmv.v.x v8, a1
mv a1, a0
1:
vsetvli a3, a2, e8, MX(), ta, ma
vse8.v v8, (a1)
sub a2, a2, a3
add a1, a1, a3
bnez a2, 1b
ret
.global MX(memset_rvv_align_)
MX(memset_rvv_align_):
vsetvli t0, zero, e8, MX(), ta, ma # vlen
vmv.v.x v8, a1
mv a1, a0
vsetvli t0, zero, e8, MX(), ta, ma # vlen
bltu a2, t0, 2f # len < vlen
# align dest to vlen
sub t1, zero, a0
addi t2, t0, -1
and t1, t1, t2 #align = (-dest) & (vlenb-1)
vsetvli t0, t1, e8, MX(), ta, ma
1:
vse8.v v8, (a1)
sub a2, a2, t0
add a1, a1, t0
2:
vsetvli t0, a2, e8, MX(), ta, ma
bnez a2, 1b
ret
.global MX(memset_rvv_tail_)
MX(memset_rvv_tail_):
vsetvli t0, a2, e8, MX(), ta, ma
vmv.v.x v8, a1
remu a3, a2, t0 # tail = n % vlenb
sub a2, a2, a3 # n -= tail
add a4, a0, a2 # end = dest + n
mv a2, a0 # n = dest
1:
vse8.v v8, (a2)
add a2, a2, t0 # dest += vlenb
bltu a2, a4, 1b # dest < end
# handle tail
vsetvli zero, a3, e8, MX(), ta, ma
vse8.v v8, (a2)
ret
.global MX(memset_rvv_tail_4x_)
MX(memset_rvv_tail_4x_):
vsetvli t0, a2, e8, MX(), ta, ma
vmv.v.x v8, a1
slli t1, t0, 2
mv a5, a0
mv a3, a2
bltu a2, t1, 2f
remu a3, a2, t1 # tail = n % (vlenb*4)
sub a2, a2, a3 # n -= tail
add a4, a0, a2 # end = dest + n
1:
vse8.v v8, (a5)
add a5, a5, t0 # dest += vlenb
vse8.v v8, (a5)
add a5, a5, t0 # dest += vlenb
vse8.v v8, (a5)
add a5, a5, t0 # dest += vlenb
vse8.v v8, (a5)
add a5, a5, t0 # dest += vlenb
bltu a5, a4, 1b # dest < end
# handle tail
2:
vsetvli a4, a3, e8, MX(), ta, ma
vse8.v v8, (a5)
sub a3, a3, a4
add a5, a5, a4
bnez a3, 2b
ret
#endif