]>
Commit | Line | Data |
---|---|---|
bf929272 PB |
1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | #ifndef __MIPS_ASM_SYNC_H__ | |
3 | #define __MIPS_ASM_SYNC_H__ | |
4 | ||
5 | /* | |
6 | * sync types are defined by the MIPS64 Instruction Set documentation in Volume | |
7 | * II-A of the MIPS Architecture Reference Manual, which can be found here: | |
8 | * | |
9 | * https://www.mips.com/?do-download=the-mips64-instruction-set-v6-06 | |
10 | * | |
11 | * Two types of barrier are provided: | |
12 | * | |
13 | * 1) Completion barriers, which ensure that a memory operation has actually | |
14 | * completed & often involve stalling the CPU pipeline to do so. | |
15 | * | |
16 | * 2) Ordering barriers, which only ensure that affected memory operations | |
17 | * won't be reordered in the CPU pipeline in a manner that violates the | |
18 | * restrictions imposed by the barrier. | |
19 | * | |
20 | * Ordering barriers can be more efficient than completion barriers, since: | |
21 | * | |
22 | * a) Ordering barriers only require memory access instructions which preceed | |
23 | * them in program order (older instructions) to reach a point in the | |
24 | * load/store datapath beyond which reordering is not possible before | |
25 | * allowing memory access instructions which follow them (younger | |
26 | * instructions) to be performed. That is, older instructions don't | |
27 | * actually need to complete - they just need to get far enough that all | |
28 | * other coherent CPUs will observe their completion before they observe | |
29 | * the effects of younger instructions. | |
30 | * | |
31 | * b) Multiple variants of ordering barrier are provided which allow the | |
32 | * effects to be restricted to different combinations of older or younger | |
33 | * loads or stores. By way of example, if we only care that stores older | |
34 | * than a barrier are observed prior to stores that are younger than a | |
35 | * barrier & don't care about the ordering of loads then the 'wmb' | |
36 | * ordering barrier can be used. Limiting the barrier's effects to stores | |
37 | * allows loads to continue unaffected & potentially allows the CPU to | |
38 | * make progress faster than if younger loads had to wait for older stores | |
39 | * to complete. | |
40 | */ | |
41 | ||
42 | /* | |
43 | * No sync instruction at all; used to allow code to nullify the effect of the | |
44 | * __SYNC() macro without needing lots of #ifdefery. | |
45 | */ | |
46 | #define __SYNC_none -1 | |
47 | ||
48 | /* | |
49 | * A full completion barrier; all memory accesses appearing prior to this sync | |
50 | * instruction in program order must complete before any memory accesses | |
51 | * appearing after this sync instruction in program order. | |
52 | */ | |
53 | #define __SYNC_full 0x00 | |
54 | ||
55 | /* | |
56 | * For now we use a full completion barrier to implement all sync types, until | |
57 | * we're satisfied that lightweight ordering barriers defined by MIPSr6 are | |
58 | * sufficient to uphold our desired memory model. | |
59 | */ | |
60 | #define __SYNC_aq __SYNC_full | |
61 | #define __SYNC_rl __SYNC_full | |
62 | #define __SYNC_mb __SYNC_full | |
63 | ||
64 | /* | |
65 | * ...except on Cavium Octeon CPUs, which have been using the 'wmb' ordering | |
66 | * barrier since 2010 & omit 'rmb' barriers because the CPUs don't perform | |
67 | * speculative reads. | |
68 | */ | |
69 | #ifdef CONFIG_CPU_CAVIUM_OCTEON | |
70 | # define __SYNC_rmb __SYNC_none | |
71 | # define __SYNC_wmb 0x04 | |
72 | #else | |
73 | # define __SYNC_rmb __SYNC_full | |
74 | # define __SYNC_wmb __SYNC_full | |
75 | #endif | |
76 | ||
77 | /* | |
78 | * A GINV sync is a little different; it doesn't relate directly to loads or | |
79 | * stores, but instead causes synchronization of an icache or TLB global | |
80 | * invalidation operation triggered by the ginvi or ginvt instructions | |
81 | * respectively. In cases where we need to know that a ginvi or ginvt operation | |
82 | * has been performed by all coherent CPUs, we must issue a sync instruction of | |
83 | * this type. Once this instruction graduates all coherent CPUs will have | |
84 | * observed the invalidation. | |
85 | */ | |
86 | #define __SYNC_ginv 0x14 | |
87 | ||
88 | /* Trivial; indicate that we always need this sync instruction. */ | |
89 | #define __SYNC_always (1 << 0) | |
90 | ||
91 | /* | |
92 | * Indicate that we need this sync instruction only on systems with weakly | |
93 | * ordered memory access. In general this is most MIPS systems, but there are | |
94 | * exceptions which provide strongly ordered memory. | |
95 | */ | |
96 | #ifdef CONFIG_WEAK_ORDERING | |
97 | # define __SYNC_weak_ordering (1 << 1) | |
98 | #else | |
99 | # define __SYNC_weak_ordering 0 | |
100 | #endif | |
101 | ||
102 | /* | |
103 | * Indicate that we need this sync instruction only on systems where LL/SC | |
104 | * don't implicitly provide a memory barrier. In general this is most MIPS | |
105 | * systems. | |
106 | */ | |
107 | #ifdef CONFIG_WEAK_REORDERING_BEYOND_LLSC | |
108 | # define __SYNC_weak_llsc (1 << 2) | |
109 | #else | |
110 | # define __SYNC_weak_llsc 0 | |
111 | #endif | |
112 | ||
113 | /* | |
114 | * Some Loongson 3 CPUs have a bug wherein execution of a memory access (load, | |
115 | * store or prefetch) in between an LL & SC can cause the SC instruction to | |
116 | * erroneously succeed, breaking atomicity. Whilst it's unusual to write code | |
117 | * containing such sequences, this bug bites harder than we might otherwise | |
118 | * expect due to reordering & speculation: | |
119 | * | |
120 | * 1) A memory access appearing prior to the LL in program order may actually | |
121 | * be executed after the LL - this is the reordering case. | |
122 | * | |
123 | * In order to avoid this we need to place a memory barrier (ie. a SYNC | |
124 | * instruction) prior to every LL instruction, in between it and any earlier | |
125 | * memory access instructions. | |
126 | * | |
127 | * This reordering case is fixed by 3A R2 CPUs, ie. 3A2000 models and later. | |
128 | * | |
129 | * 2) If a conditional branch exists between an LL & SC with a target outside | |
130 | * of the LL-SC loop, for example an exit upon value mismatch in cmpxchg() | |
131 | * or similar, then misprediction of the branch may allow speculative | |
132 | * execution of memory accesses from outside of the LL-SC loop. | |
133 | * | |
134 | * In order to avoid this we need a memory barrier (ie. a SYNC instruction) | |
135 | * at each affected branch target. | |
136 | * | |
137 | * This case affects all current Loongson 3 CPUs. | |
138 | * | |
139 | * The above described cases cause an error in the cache coherence protocol; | |
140 | * such that the Invalidate of a competing LL-SC goes 'missing' and SC | |
141 | * erroneously observes its core still has Exclusive state and lets the SC | |
142 | * proceed. | |
143 | * | |
144 | * Therefore the error only occurs on SMP systems. | |
145 | */ | |
146 | #ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS | |
147 | # define __SYNC_loongson3_war (1 << 31) | |
148 | #else | |
149 | # define __SYNC_loongson3_war 0 | |
150 | #endif | |
151 | ||
152 | /* | |
153 | * Some Cavium Octeon CPUs suffer from a bug that causes a single wmb ordering | |
154 | * barrier to be ineffective, requiring the use of 2 in sequence to provide an | |
155 | * effective barrier as noted by commit 6b07d38aaa52 ("MIPS: Octeon: Use | |
156 | * optimized memory barrier primitives."). Here we specify that the affected | |
157 | * sync instructions should be emitted twice. | |
97e914b7 MT |
158 | * Note that this expression is evaluated by the assembler (not the compiler), |
159 | * and that the assembler evaluates '==' as 0 or -1, not 0 or 1. | |
bf929272 PB |
160 | */ |
161 | #ifdef CONFIG_CPU_CAVIUM_OCTEON | |
97e914b7 | 162 | # define __SYNC_rpt(type) (1 - (type == __SYNC_wmb)) |
bf929272 PB |
163 | #else |
164 | # define __SYNC_rpt(type) 1 | |
165 | #endif | |
166 | ||
167 | /* | |
168 | * The main event. Here we actually emit a sync instruction of a given type, if | |
169 | * reason is non-zero. | |
170 | * | |
171 | * In future we have the option of emitting entries in a fixups-style table | |
172 | * here that would allow us to opportunistically remove some sync instructions | |
173 | * when we detect at runtime that we're running on a CPU that doesn't need | |
174 | * them. | |
175 | */ | |
176 | #ifdef CONFIG_CPU_HAS_SYNC | |
177 | # define ____SYNC(_type, _reason, _else) \ | |
178 | .if (( _type ) != -1) && ( _reason ); \ | |
179 | .set push; \ | |
180 | .set MIPS_ISA_LEVEL_RAW; \ | |
181 | .rept __SYNC_rpt(_type); \ | |
182 | sync _type; \ | |
183 | .endr; \ | |
184 | .set pop; \ | |
185 | .else; \ | |
186 | _else; \ | |
187 | .endif | |
188 | #else | |
189 | # define ____SYNC(_type, _reason, _else) | |
190 | #endif | |
191 | ||
192 | /* | |
193 | * Preprocessor magic to expand macros used as arguments before we insert them | |
194 | * into assembly code. | |
195 | */ | |
196 | #ifdef __ASSEMBLY__ | |
197 | # define ___SYNC(type, reason, else) \ | |
198 | ____SYNC(type, reason, else) | |
199 | #else | |
200 | # define ___SYNC(type, reason, else) \ | |
201 | __stringify(____SYNC(type, reason, else)) | |
202 | #endif | |
203 | ||
204 | #define __SYNC(type, reason) \ | |
205 | ___SYNC(__SYNC_##type, __SYNC_##reason, ) | |
206 | #define __SYNC_ELSE(type, reason, else) \ | |
207 | ___SYNC(__SYNC_##type, __SYNC_##reason, else) | |
208 | ||
209 | #endif /* __MIPS_ASM_SYNC_H__ */ |