]>
Commit | Line | Data |
---|---|---|
1f7e3dc0 CZ |
1 | /* |
2 | * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | */ | |
8 | ||
9 | #include <linux/linkage.h> | |
e6a72b7d | 10 | #include <asm/cache.h> |
1f7e3dc0 | 11 | |
e6a72b7d EP |
12 | /* |
13 | * The memset implementation below is optimized to use prefetchw and prealloc | |
14 | * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6) | |
15 | * If you want to implement optimized memset for other possible L1 data cache | |
16 | * line lengths (32B and 128B) you should rewrite code carefully checking | |
17 | * we don't call any prefetchw/prealloc instruction for L1 cache lines which | |
18 | * don't belongs to memset area. | |
19 | */ | |
20 | ||
21 | #if L1_CACHE_SHIFT == 6 | |
22 | ||
23 | .macro PREALLOC_INSTR reg, off | |
24 | prealloc [\reg, \off] | |
25 | .endm | |
26 | ||
27 | .macro PREFETCHW_INSTR reg, off | |
28 | prefetchw [\reg, \off] | |
29 | .endm | |
30 | ||
31 | #else | |
32 | ||
33 | .macro PREALLOC_INSTR | |
34 | .endm | |
35 | ||
36 | .macro PREFETCHW_INSTR | |
37 | .endm | |
38 | ||
39 | #endif | |
1f7e3dc0 | 40 | |
86effd0d | 41 | ENTRY_CFI(memset) |
e6a72b7d | 42 | PREFETCHW_INSTR r0, 0 ; Prefetch the first write location |
1f7e3dc0 CZ |
43 | mov.f 0, r2 |
44 | ;;; if size is zero | |
45 | jz.d [blink] | |
46 | mov r3, r0 ; don't clobber ret val | |
47 | ||
48 | ;;; if length < 8 | |
49 | brls.d.nt r2, 8, .Lsmallchunk | |
50 | mov.f lp_count,r2 | |
51 | ||
52 | and.f r4, r0, 0x03 | |
53 | rsub lp_count, r4, 4 | |
54 | lpnz @.Laligndestination | |
55 | ;; LOOP BEGIN | |
56 | stb.ab r1, [r3,1] | |
57 | sub r2, r2, 1 | |
58 | .Laligndestination: | |
59 | ||
60 | ;;; Destination is aligned | |
61 | and r1, r1, 0xFF | |
62 | asl r4, r1, 8 | |
63 | or r4, r4, r1 | |
64 | asl r5, r4, 16 | |
65 | or r5, r5, r4 | |
66 | mov r4, r5 | |
67 | ||
68 | sub3 lp_count, r2, 8 | |
69 | cmp r2, 64 | |
70 | bmsk.hi r2, r2, 5 | |
71 | mov.ls lp_count, 0 | |
72 | add3.hi r2, r2, 8 | |
73 | ||
74 | ;;; Convert len to Dwords, unfold x8 | |
75 | lsr.f lp_count, lp_count, 6 | |
262137bc | 76 | |
1f7e3dc0 CZ |
77 | lpnz @.Lset64bytes |
78 | ;; LOOP START | |
e6a72b7d EP |
79 | PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching |
80 | ||
262137bc | 81 | #ifdef CONFIG_ARC_HAS_LL64 |
1f7e3dc0 CZ |
82 | std.ab r4, [r3, 8] |
83 | std.ab r4, [r3, 8] | |
84 | std.ab r4, [r3, 8] | |
85 | std.ab r4, [r3, 8] | |
86 | std.ab r4, [r3, 8] | |
87 | std.ab r4, [r3, 8] | |
88 | std.ab r4, [r3, 8] | |
89 | std.ab r4, [r3, 8] | |
262137bc VG |
90 | #else |
91 | st.ab r4, [r3, 4] | |
92 | st.ab r4, [r3, 4] | |
93 | st.ab r4, [r3, 4] | |
94 | st.ab r4, [r3, 4] | |
95 | st.ab r4, [r3, 4] | |
96 | st.ab r4, [r3, 4] | |
97 | st.ab r4, [r3, 4] | |
98 | st.ab r4, [r3, 4] | |
99 | st.ab r4, [r3, 4] | |
100 | st.ab r4, [r3, 4] | |
101 | st.ab r4, [r3, 4] | |
102 | st.ab r4, [r3, 4] | |
103 | st.ab r4, [r3, 4] | |
104 | st.ab r4, [r3, 4] | |
105 | st.ab r4, [r3, 4] | |
106 | st.ab r4, [r3, 4] | |
107 | #endif | |
1f7e3dc0 CZ |
108 | .Lset64bytes: |
109 | ||
110 | lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes | |
111 | lpnz .Lset32bytes | |
112 | ;; LOOP START | |
262137bc | 113 | #ifdef CONFIG_ARC_HAS_LL64 |
1f7e3dc0 CZ |
114 | std.ab r4, [r3, 8] |
115 | std.ab r4, [r3, 8] | |
116 | std.ab r4, [r3, 8] | |
117 | std.ab r4, [r3, 8] | |
262137bc VG |
118 | #else |
119 | st.ab r4, [r3, 4] | |
120 | st.ab r4, [r3, 4] | |
121 | st.ab r4, [r3, 4] | |
122 | st.ab r4, [r3, 4] | |
123 | st.ab r4, [r3, 4] | |
124 | st.ab r4, [r3, 4] | |
125 | st.ab r4, [r3, 4] | |
126 | st.ab r4, [r3, 4] | |
127 | #endif | |
1f7e3dc0 CZ |
128 | .Lset32bytes: |
129 | ||
130 | and.f lp_count, r2, 0x1F ;Last remaining 31 bytes | |
131 | .Lsmallchunk: | |
132 | lpnz .Lcopy3bytes | |
133 | ;; LOOP START | |
134 | stb.ab r1, [r3, 1] | |
135 | .Lcopy3bytes: | |
136 | ||
137 | j [blink] | |
138 | ||
86effd0d | 139 | END_CFI(memset) |
1f7e3dc0 | 140 | |
86effd0d | 141 | ENTRY_CFI(memzero) |
1f7e3dc0 CZ |
142 | ; adjust bzero args to memset args |
143 | mov r2, r1 | |
144 | b.d memset ;tail call so need to tinker with blink | |
145 | mov r1, 0 | |
86effd0d | 146 | END_CFI(memzero) |