]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
9f825962 DM |
2 | /* NG4memset.S: Niagara-4 optimized memset/bzero. |
3 | * | |
4 | * Copyright (C) 2012 David S. Miller (davem@davemloft.net) | |
5 | */ | |
6 | ||
7 | #include <asm/asi.h> | |
8 | ||
9 | .register %g2, #scratch | |
10 | .register %g3, #scratch | |
11 | ||
12 | .text | |
13 | .align 32 | |
14 | .globl NG4memset | |
15 | NG4memset: | |
16 | andcc %o1, 0xff, %o4 | |
17 | be,pt %icc, 1f | |
18 | mov %o2, %o1 | |
19 | sllx %o4, 8, %g1 | |
20 | or %g1, %o4, %o2 | |
21 | sllx %o2, 16, %g1 | |
22 | or %g1, %o2, %o2 | |
23 | sllx %o2, 32, %g1 | |
24 | ba,pt %icc, 1f | |
25 | or %g1, %o2, %o4 | |
26 | .size NG4memset,.-NG4memset | |
27 | ||
28 | .align 32 | |
29 | .globl NG4bzero | |
30 | NG4bzero: | |
31 | clr %o4 | |
32 | 1: cmp %o1, 16 | |
33 | ble %icc, .Ltiny | |
34 | mov %o0, %o3 | |
35 | sub %g0, %o0, %g1 | |
36 | and %g1, 0x7, %g1 | |
37 | brz,pt %g1, .Laligned8 | |
38 | sub %o1, %g1, %o1 | |
39 | 1: stb %o4, [%o0 + 0x00] | |
40 | subcc %g1, 1, %g1 | |
41 | bne,pt %icc, 1b | |
42 | add %o0, 1, %o0 | |
43 | .Laligned8: | |
44 | cmp %o1, 64 + (64 - 8) | |
45 | ble .Lmedium | |
46 | sub %g0, %o0, %g1 | |
47 | andcc %g1, (64 - 1), %g1 | |
48 | brz,pn %g1, .Laligned64 | |
49 | sub %o1, %g1, %o1 | |
50 | 1: stx %o4, [%o0 + 0x00] | |
51 | subcc %g1, 8, %g1 | |
52 | bne,pt %icc, 1b | |
53 | add %o0, 0x8, %o0 | |
54 | .Laligned64: | |
55 | andn %o1, 64 - 1, %g1 | |
56 | sub %o1, %g1, %o1 | |
57 | brnz,pn %o4, .Lnon_bzero_loop | |
58 | mov 0x20, %g2 | |
59 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | |
60 | subcc %g1, 0x40, %g1 | |
61 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | |
62 | bne,pt %icc, 1b | |
63 | add %o0, 0x40, %o0 | |
64 | .Lpostloop: | |
65 | cmp %o1, 8 | |
66 | bl,pn %icc, .Ltiny | |
67 | membar #StoreStore|#StoreLoad | |
68 | .Lmedium: | |
69 | andn %o1, 0x7, %g1 | |
70 | sub %o1, %g1, %o1 | |
71 | 1: stx %o4, [%o0 + 0x00] | |
72 | subcc %g1, 0x8, %g1 | |
73 | bne,pt %icc, 1b | |
74 | add %o0, 0x08, %o0 | |
75 | andcc %o1, 0x4, %g1 | |
76 | be,pt %icc, .Ltiny | |
77 | sub %o1, %g1, %o1 | |
78 | stw %o4, [%o0 + 0x00] | |
79 | add %o0, 0x4, %o0 | |
80 | .Ltiny: | |
81 | cmp %o1, 0 | |
82 | be,pn %icc, .Lexit | |
83 | 1: subcc %o1, 1, %o1 | |
84 | stb %o4, [%o0 + 0x00] | |
85 | bne,pt %icc, 1b | |
86 | add %o0, 1, %o0 | |
87 | .Lexit: | |
88 | retl | |
89 | mov %o3, %o0 | |
90 | .Lnon_bzero_loop: | |
91 | mov 0x08, %g3 | |
92 | mov 0x28, %o5 | |
93 | 1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | |
94 | subcc %g1, 0x40, %g1 | |
95 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | |
96 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | |
97 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | |
98 | add %o0, 0x10, %o0 | |
99 | stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P | |
100 | stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P | |
101 | stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P | |
102 | stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P | |
103 | bne,pt %icc, 1b | |
104 | add %o0, 0x30, %o0 | |
105 | ba,a,pt %icc, .Lpostloop | |
0ae2d26f | 106 | nop |
9f825962 | 107 | .size NG4bzero,.-NG4bzero |