]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-or-later | |
2 | /* | |
3 | * native hashtable management. | |
4 | * | |
5 | * SMP scalability work: | |
6 | * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM | |
7 | */ | |
8 | ||
9 | #undef DEBUG_LOW | |
10 | ||
11 | #include <linux/spinlock.h> | |
12 | #include <linux/bitops.h> | |
13 | #include <linux/of.h> | |
14 | #include <linux/processor.h> | |
15 | #include <linux/threads.h> | |
16 | #include <linux/smp.h> | |
17 | ||
18 | #include <asm/machdep.h> | |
19 | #include <asm/mmu.h> | |
20 | #include <asm/mmu_context.h> | |
21 | #include <asm/pgtable.h> | |
22 | #include <asm/trace.h> | |
23 | #include <asm/tlb.h> | |
24 | #include <asm/cputable.h> | |
25 | #include <asm/udbg.h> | |
26 | #include <asm/kexec.h> | |
27 | #include <asm/ppc-opcode.h> | |
28 | #include <asm/feature-fixups.h> | |
29 | ||
30 | #include <misc/cxl-base.h> | |
31 | ||
32 | #ifdef DEBUG_LOW | |
33 | #define DBG_LOW(fmt...) udbg_printf(fmt) | |
34 | #else | |
35 | #define DBG_LOW(fmt...) | |
36 | #endif | |
37 | ||
38 | #ifdef __BIG_ENDIAN__ | |
39 | #define HPTE_LOCK_BIT 3 | |
40 | #else | |
41 | #define HPTE_LOCK_BIT (56+3) | |
42 | #endif | |
43 | ||
44 | static DEFINE_RAW_SPINLOCK(native_tlbie_lock); | |
45 | ||
46 | static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is) | |
47 | { | |
48 | unsigned long rb; | |
49 | ||
50 | rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); | |
51 | ||
52 | asm volatile("tlbiel %0" : : "r" (rb)); | |
53 | } | |
54 | ||
55 | /* | |
56 | * tlbiel instruction for hash, set invalidation | |
57 | * i.e., r=1 and is=01 or is=10 or is=11 | |
58 | */ | |
59 | static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is, | |
60 | unsigned int pid, | |
61 | unsigned int ric, unsigned int prs) | |
62 | { | |
63 | unsigned long rb; | |
64 | unsigned long rs; | |
65 | unsigned int r = 0; /* hash format */ | |
66 | ||
67 | rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53)); | |
68 | rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); | |
69 | ||
70 | asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) | |
71 | : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) | |
72 | : "memory"); | |
73 | } | |
74 | ||
75 | ||
76 | static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is) | |
77 | { | |
78 | unsigned int set; | |
79 | ||
80 | asm volatile("ptesync": : :"memory"); | |
81 | ||
82 | for (set = 0; set < num_sets; set++) | |
83 | tlbiel_hash_set_isa206(set, is); | |
84 | ||
85 | asm volatile("ptesync": : :"memory"); | |
86 | } | |
87 | ||
88 | static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) | |
89 | { | |
90 | unsigned int set; | |
91 | ||
92 | asm volatile("ptesync": : :"memory"); | |
93 | ||
94 | /* | |
95 | * Flush the first set of the TLB, and any caching of partition table | |
96 | * entries. Then flush the remaining sets of the TLB. Hash mode uses | |
97 | * partition scoped TLB translations. | |
98 | */ | |
99 | tlbiel_hash_set_isa300(0, is, 0, 2, 0); | |
100 | for (set = 1; set < num_sets; set++) | |
101 | tlbiel_hash_set_isa300(set, is, 0, 0, 0); | |
102 | ||
103 | /* | |
104 | * Now invalidate the process table cache. | |
105 | * | |
106 | * From ISA v3.0B p. 1078: | |
107 | * The following forms are invalid. | |
108 | * * PRS=1, R=0, and RIC!=2 (The only process-scoped | |
109 | * HPT caching is of the Process Table.) | |
110 | */ | |
111 | tlbiel_hash_set_isa300(0, is, 0, 2, 1); | |
112 | ||
113 | asm volatile("ptesync": : :"memory"); | |
114 | ||
115 | asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); | |
116 | } | |
117 | ||
118 | void hash__tlbiel_all(unsigned int action) | |
119 | { | |
120 | unsigned int is; | |
121 | ||
122 | switch (action) { | |
123 | case TLB_INVAL_SCOPE_GLOBAL: | |
124 | is = 3; | |
125 | break; | |
126 | case TLB_INVAL_SCOPE_LPID: | |
127 | is = 2; | |
128 | break; | |
129 | default: | |
130 | BUG(); | |
131 | } | |
132 | ||
133 | if (early_cpu_has_feature(CPU_FTR_ARCH_300)) | |
134 | tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is); | |
135 | else if (early_cpu_has_feature(CPU_FTR_ARCH_207S)) | |
136 | tlbiel_all_isa206(POWER8_TLB_SETS, is); | |
137 | else if (early_cpu_has_feature(CPU_FTR_ARCH_206)) | |
138 | tlbiel_all_isa206(POWER7_TLB_SETS, is); | |
139 | else | |
140 | WARN(1, "%s called on pre-POWER7 CPU\n", __func__); | |
141 | } | |
142 | ||
143 | static inline unsigned long ___tlbie(unsigned long vpn, int psize, | |
144 | int apsize, int ssize) | |
145 | { | |
146 | unsigned long va; | |
147 | unsigned int penc; | |
148 | unsigned long sllp; | |
149 | ||
150 | /* | |
151 | * We need 14 to 65 bits of va for a tlibe of 4K page | |
152 | * With vpn we ignore the lower VPN_SHIFT bits already. | |
153 | * And top two bits are already ignored because we can | |
154 | * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT | |
155 | * of 12. | |
156 | */ | |
157 | va = vpn << VPN_SHIFT; | |
158 | /* | |
159 | * clear top 16 bits of 64bit va, non SLS segment | |
160 | * Older versions of the architecture (2.02 and earler) require the | |
161 | * masking of the top 16 bits. | |
162 | */ | |
163 | if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) | |
164 | va &= ~(0xffffULL << 48); | |
165 | ||
166 | switch (psize) { | |
167 | case MMU_PAGE_4K: | |
168 | /* clear out bits after (52) [0....52.....63] */ | |
169 | va &= ~((1ul << (64 - 52)) - 1); | |
170 | va |= ssize << 8; | |
171 | sllp = get_sllp_encoding(apsize); | |
172 | va |= sllp << 5; | |
173 | asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) | |
174 | : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) | |
175 | : "memory"); | |
176 | break; | |
177 | default: | |
178 | /* We need 14 to 14 + i bits of va */ | |
179 | penc = mmu_psize_defs[psize].penc[apsize]; | |
180 | va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); | |
181 | va |= penc << 12; | |
182 | va |= ssize << 8; | |
183 | /* | |
184 | * AVAL bits: | |
185 | * We don't need all the bits, but rest of the bits | |
186 | * must be ignored by the processor. | |
187 | * vpn cover upto 65 bits of va. (0...65) and we need | |
188 | * 58..64 bits of va. | |
189 | */ | |
190 | va |= (vpn & 0xfe); /* AVAL */ | |
191 | va |= 1; /* L */ | |
192 | asm volatile(ASM_FTR_IFCLR("tlbie %0,1", PPC_TLBIE(%1,%0), %2) | |
193 | : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) | |
194 | : "memory"); | |
195 | break; | |
196 | } | |
197 | return va; | |
198 | } | |
199 | ||
200 | static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, | |
201 | int apsize, int ssize) | |
202 | { | |
203 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { | |
204 | /* Radix flush for a hash guest */ | |
205 | ||
206 | unsigned long rb,rs,prs,r,ric; | |
207 | ||
208 | rb = PPC_BIT(52); /* IS = 2 */ | |
209 | rs = 0; /* lpid = 0 */ | |
210 | prs = 0; /* partition scoped */ | |
211 | r = 1; /* radix format */ | |
212 | ric = 0; /* RIC_FLSUH_TLB */ | |
213 | ||
214 | /* | |
215 | * Need the extra ptesync to make sure we don't | |
216 | * re-order the tlbie | |
217 | */ | |
218 | asm volatile("ptesync": : :"memory"); | |
219 | asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) | |
220 | : : "r"(rb), "i"(r), "i"(prs), | |
221 | "i"(ric), "r"(rs) : "memory"); | |
222 | } | |
223 | ||
224 | ||
225 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { | |
226 | /* Need the extra ptesync to ensure we don't reorder tlbie*/ | |
227 | asm volatile("ptesync": : :"memory"); | |
228 | ___tlbie(vpn, psize, apsize, ssize); | |
229 | } | |
230 | } | |
231 | ||
232 | static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) | |
233 | { | |
234 | unsigned long rb; | |
235 | ||
236 | rb = ___tlbie(vpn, psize, apsize, ssize); | |
237 | trace_tlbie(0, 0, rb, 0, 0, 0, 0); | |
238 | } | |
239 | ||
240 | static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) | |
241 | { | |
242 | unsigned long va; | |
243 | unsigned int penc; | |
244 | unsigned long sllp; | |
245 | ||
246 | /* VPN_SHIFT can be atmost 12 */ | |
247 | va = vpn << VPN_SHIFT; | |
248 | /* | |
249 | * clear top 16 bits of 64 bit va, non SLS segment | |
250 | * Older versions of the architecture (2.02 and earler) require the | |
251 | * masking of the top 16 bits. | |
252 | */ | |
253 | if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) | |
254 | va &= ~(0xffffULL << 48); | |
255 | ||
256 | switch (psize) { | |
257 | case MMU_PAGE_4K: | |
258 | /* clear out bits after(52) [0....52.....63] */ | |
259 | va &= ~((1ul << (64 - 52)) - 1); | |
260 | va |= ssize << 8; | |
261 | sllp = get_sllp_encoding(apsize); | |
262 | va |= sllp << 5; | |
263 | asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,0", %1) | |
264 | : : "r" (va), "i" (CPU_FTR_ARCH_206) | |
265 | : "memory"); | |
266 | break; | |
267 | default: | |
268 | /* We need 14 to 14 + i bits of va */ | |
269 | penc = mmu_psize_defs[psize].penc[apsize]; | |
270 | va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); | |
271 | va |= penc << 12; | |
272 | va |= ssize << 8; | |
273 | /* | |
274 | * AVAL bits: | |
275 | * We don't need all the bits, but rest of the bits | |
276 | * must be ignored by the processor. | |
277 | * vpn cover upto 65 bits of va. (0...65) and we need | |
278 | * 58..64 bits of va. | |
279 | */ | |
280 | va |= (vpn & 0xfe); | |
281 | va |= 1; /* L */ | |
282 | asm volatile(ASM_FTR_IFSET("tlbiel %0", "tlbiel %0,1", %1) | |
283 | : : "r" (va), "i" (CPU_FTR_ARCH_206) | |
284 | : "memory"); | |
285 | break; | |
286 | } | |
287 | trace_tlbie(0, 1, va, 0, 0, 0, 0); | |
288 | ||
289 | } | |
290 | ||
291 | static inline void tlbie(unsigned long vpn, int psize, int apsize, | |
292 | int ssize, int local) | |
293 | { | |
294 | unsigned int use_local; | |
295 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | |
296 | ||
297 | use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); | |
298 | ||
299 | if (use_local) | |
300 | use_local = mmu_psize_defs[psize].tlbiel; | |
301 | if (lock_tlbie && !use_local) | |
302 | raw_spin_lock(&native_tlbie_lock); | |
303 | asm volatile("ptesync": : :"memory"); | |
304 | if (use_local) { | |
305 | __tlbiel(vpn, psize, apsize, ssize); | |
306 | asm volatile("ptesync": : :"memory"); | |
307 | } else { | |
308 | __tlbie(vpn, psize, apsize, ssize); | |
309 | fixup_tlbie_vpn(vpn, psize, apsize, ssize); | |
310 | asm volatile("eieio; tlbsync; ptesync": : :"memory"); | |
311 | } | |
312 | if (lock_tlbie && !use_local) | |
313 | raw_spin_unlock(&native_tlbie_lock); | |
314 | } | |
315 | ||
316 | static inline void native_lock_hpte(struct hash_pte *hptep) | |
317 | { | |
318 | unsigned long *word = (unsigned long *)&hptep->v; | |
319 | ||
320 | while (1) { | |
321 | if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) | |
322 | break; | |
323 | spin_begin(); | |
324 | while(test_bit(HPTE_LOCK_BIT, word)) | |
325 | spin_cpu_relax(); | |
326 | spin_end(); | |
327 | } | |
328 | } | |
329 | ||
330 | static inline void native_unlock_hpte(struct hash_pte *hptep) | |
331 | { | |
332 | unsigned long *word = (unsigned long *)&hptep->v; | |
333 | ||
334 | clear_bit_unlock(HPTE_LOCK_BIT, word); | |
335 | } | |
336 | ||
337 | static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, | |
338 | unsigned long pa, unsigned long rflags, | |
339 | unsigned long vflags, int psize, int apsize, int ssize) | |
340 | { | |
341 | struct hash_pte *hptep = htab_address + hpte_group; | |
342 | unsigned long hpte_v, hpte_r; | |
343 | int i; | |
344 | ||
345 | if (!(vflags & HPTE_V_BOLTED)) { | |
346 | DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," | |
347 | " rflags=%lx, vflags=%lx, psize=%d)\n", | |
348 | hpte_group, vpn, pa, rflags, vflags, psize); | |
349 | } | |
350 | ||
351 | for (i = 0; i < HPTES_PER_GROUP; i++) { | |
352 | if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) { | |
353 | /* retry with lock held */ | |
354 | native_lock_hpte(hptep); | |
355 | if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) | |
356 | break; | |
357 | native_unlock_hpte(hptep); | |
358 | } | |
359 | ||
360 | hptep++; | |
361 | } | |
362 | ||
363 | if (i == HPTES_PER_GROUP) | |
364 | return -1; | |
365 | ||
366 | hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; | |
367 | hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; | |
368 | ||
369 | if (!(vflags & HPTE_V_BOLTED)) { | |
370 | DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", | |
371 | i, hpte_v, hpte_r); | |
372 | } | |
373 | ||
374 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { | |
375 | hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); | |
376 | hpte_v = hpte_old_to_new_v(hpte_v); | |
377 | } | |
378 | ||
379 | hptep->r = cpu_to_be64(hpte_r); | |
380 | /* Guarantee the second dword is visible before the valid bit */ | |
381 | eieio(); | |
382 | /* | |
383 | * Now set the first dword including the valid bit | |
384 | * NOTE: this also unlocks the hpte | |
385 | */ | |
386 | hptep->v = cpu_to_be64(hpte_v); | |
387 | ||
388 | __asm__ __volatile__ ("ptesync" : : : "memory"); | |
389 | ||
390 | return i | (!!(vflags & HPTE_V_SECONDARY) << 3); | |
391 | } | |
392 | ||
393 | static long native_hpte_remove(unsigned long hpte_group) | |
394 | { | |
395 | struct hash_pte *hptep; | |
396 | int i; | |
397 | int slot_offset; | |
398 | unsigned long hpte_v; | |
399 | ||
400 | DBG_LOW(" remove(group=%lx)\n", hpte_group); | |
401 | ||
402 | /* pick a random entry to start at */ | |
403 | slot_offset = mftb() & 0x7; | |
404 | ||
405 | for (i = 0; i < HPTES_PER_GROUP; i++) { | |
406 | hptep = htab_address + hpte_group + slot_offset; | |
407 | hpte_v = be64_to_cpu(hptep->v); | |
408 | ||
409 | if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { | |
410 | /* retry with lock held */ | |
411 | native_lock_hpte(hptep); | |
412 | hpte_v = be64_to_cpu(hptep->v); | |
413 | if ((hpte_v & HPTE_V_VALID) | |
414 | && !(hpte_v & HPTE_V_BOLTED)) | |
415 | break; | |
416 | native_unlock_hpte(hptep); | |
417 | } | |
418 | ||
419 | slot_offset++; | |
420 | slot_offset &= 0x7; | |
421 | } | |
422 | ||
423 | if (i == HPTES_PER_GROUP) | |
424 | return -1; | |
425 | ||
426 | /* Invalidate the hpte. NOTE: this also unlocks it */ | |
427 | hptep->v = 0; | |
428 | ||
429 | return i; | |
430 | } | |
431 | ||
432 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |
433 | unsigned long vpn, int bpsize, | |
434 | int apsize, int ssize, unsigned long flags) | |
435 | { | |
436 | struct hash_pte *hptep = htab_address + slot; | |
437 | unsigned long hpte_v, want_v; | |
438 | int ret = 0, local = 0; | |
439 | ||
440 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); | |
441 | ||
442 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", | |
443 | vpn, want_v & HPTE_V_AVPN, slot, newpp); | |
444 | ||
445 | hpte_v = hpte_get_old_v(hptep); | |
446 | /* | |
447 | * We need to invalidate the TLB always because hpte_remove doesn't do | |
448 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | |
449 | * random entry from it. When we do that we don't invalidate the TLB | |
450 | * (hpte_remove) because we assume the old translation is still | |
451 | * technically "valid". | |
452 | */ | |
453 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { | |
454 | DBG_LOW(" -> miss\n"); | |
455 | ret = -1; | |
456 | } else { | |
457 | native_lock_hpte(hptep); | |
458 | /* recheck with locks held */ | |
459 | hpte_v = hpte_get_old_v(hptep); | |
460 | if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || | |
461 | !(hpte_v & HPTE_V_VALID))) { | |
462 | ret = -1; | |
463 | } else { | |
464 | DBG_LOW(" -> hit\n"); | |
465 | /* Update the HPTE */ | |
466 | hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & | |
467 | ~(HPTE_R_PPP | HPTE_R_N)) | | |
468 | (newpp & (HPTE_R_PPP | HPTE_R_N | | |
469 | HPTE_R_C))); | |
470 | } | |
471 | native_unlock_hpte(hptep); | |
472 | } | |
473 | ||
474 | if (flags & HPTE_LOCAL_UPDATE) | |
475 | local = 1; | |
476 | /* | |
477 | * Ensure it is out of the tlb too if it is not a nohpte fault | |
478 | */ | |
479 | if (!(flags & HPTE_NOHPTE_UPDATE)) | |
480 | tlbie(vpn, bpsize, apsize, ssize, local); | |
481 | ||
482 | return ret; | |
483 | } | |
484 | ||
485 | static long native_hpte_find(unsigned long vpn, int psize, int ssize) | |
486 | { | |
487 | struct hash_pte *hptep; | |
488 | unsigned long hash; | |
489 | unsigned long i; | |
490 | long slot; | |
491 | unsigned long want_v, hpte_v; | |
492 | ||
493 | hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); | |
494 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
495 | ||
496 | /* Bolted mappings are only ever in the primary group */ | |
497 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
498 | for (i = 0; i < HPTES_PER_GROUP; i++) { | |
499 | ||
500 | hptep = htab_address + slot; | |
501 | hpte_v = hpte_get_old_v(hptep); | |
502 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) | |
503 | /* HPTE matches */ | |
504 | return slot; | |
505 | ++slot; | |
506 | } | |
507 | ||
508 | return -1; | |
509 | } | |
510 | ||
511 | /* | |
512 | * Update the page protection bits. Intended to be used to create | |
513 | * guard pages for kernel data structures on pages which are bolted | |
514 | * in the HPT. Assumes pages being operated on will not be stolen. | |
515 | * | |
516 | * No need to lock here because we should be the only user. | |
517 | */ | |
518 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | |
519 | int psize, int ssize) | |
520 | { | |
521 | unsigned long vpn; | |
522 | unsigned long vsid; | |
523 | long slot; | |
524 | struct hash_pte *hptep; | |
525 | ||
526 | vsid = get_kernel_vsid(ea, ssize); | |
527 | vpn = hpt_vpn(ea, vsid, ssize); | |
528 | ||
529 | slot = native_hpte_find(vpn, psize, ssize); | |
530 | if (slot == -1) | |
531 | panic("could not find page to bolt\n"); | |
532 | hptep = htab_address + slot; | |
533 | ||
534 | /* Update the HPTE */ | |
535 | hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & | |
536 | ~(HPTE_R_PPP | HPTE_R_N)) | | |
537 | (newpp & (HPTE_R_PPP | HPTE_R_N))); | |
538 | /* | |
539 | * Ensure it is out of the tlb too. Bolted entries base and | |
540 | * actual page size will be same. | |
541 | */ | |
542 | tlbie(vpn, psize, psize, ssize, 0); | |
543 | } | |
544 | ||
545 | /* | |
546 | * Remove a bolted kernel entry. Memory hotplug uses this. | |
547 | * | |
548 | * No need to lock here because we should be the only user. | |
549 | */ | |
550 | static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) | |
551 | { | |
552 | unsigned long vpn; | |
553 | unsigned long vsid; | |
554 | long slot; | |
555 | struct hash_pte *hptep; | |
556 | ||
557 | vsid = get_kernel_vsid(ea, ssize); | |
558 | vpn = hpt_vpn(ea, vsid, ssize); | |
559 | ||
560 | slot = native_hpte_find(vpn, psize, ssize); | |
561 | if (slot == -1) | |
562 | return -ENOENT; | |
563 | ||
564 | hptep = htab_address + slot; | |
565 | ||
566 | VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED)); | |
567 | ||
568 | /* Invalidate the hpte */ | |
569 | hptep->v = 0; | |
570 | ||
571 | /* Invalidate the TLB */ | |
572 | tlbie(vpn, psize, psize, ssize, 0); | |
573 | return 0; | |
574 | } | |
575 | ||
576 | ||
577 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | |
578 | int bpsize, int apsize, int ssize, int local) | |
579 | { | |
580 | struct hash_pte *hptep = htab_address + slot; | |
581 | unsigned long hpte_v; | |
582 | unsigned long want_v; | |
583 | unsigned long flags; | |
584 | ||
585 | local_irq_save(flags); | |
586 | ||
587 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); | |
588 | ||
589 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); | |
590 | hpte_v = hpte_get_old_v(hptep); | |
591 | ||
592 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { | |
593 | native_lock_hpte(hptep); | |
594 | /* recheck with locks held */ | |
595 | hpte_v = hpte_get_old_v(hptep); | |
596 | ||
597 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) | |
598 | /* Invalidate the hpte. NOTE: this also unlocks it */ | |
599 | hptep->v = 0; | |
600 | else | |
601 | native_unlock_hpte(hptep); | |
602 | } | |
603 | /* | |
604 | * We need to invalidate the TLB always because hpte_remove doesn't do | |
605 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | |
606 | * random entry from it. When we do that we don't invalidate the TLB | |
607 | * (hpte_remove) because we assume the old translation is still | |
608 | * technically "valid". | |
609 | */ | |
610 | tlbie(vpn, bpsize, apsize, ssize, local); | |
611 | ||
612 | local_irq_restore(flags); | |
613 | } | |
614 | ||
615 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
616 | static void native_hugepage_invalidate(unsigned long vsid, | |
617 | unsigned long addr, | |
618 | unsigned char *hpte_slot_array, | |
619 | int psize, int ssize, int local) | |
620 | { | |
621 | int i; | |
622 | struct hash_pte *hptep; | |
623 | int actual_psize = MMU_PAGE_16M; | |
624 | unsigned int max_hpte_count, valid; | |
625 | unsigned long flags, s_addr = addr; | |
626 | unsigned long hpte_v, want_v, shift; | |
627 | unsigned long hidx, vpn = 0, hash, slot; | |
628 | ||
629 | shift = mmu_psize_defs[psize].shift; | |
630 | max_hpte_count = 1U << (PMD_SHIFT - shift); | |
631 | ||
632 | local_irq_save(flags); | |
633 | for (i = 0; i < max_hpte_count; i++) { | |
634 | valid = hpte_valid(hpte_slot_array, i); | |
635 | if (!valid) | |
636 | continue; | |
637 | hidx = hpte_hash_index(hpte_slot_array, i); | |
638 | ||
639 | /* get the vpn */ | |
640 | addr = s_addr + (i * (1ul << shift)); | |
641 | vpn = hpt_vpn(addr, vsid, ssize); | |
642 | hash = hpt_hash(vpn, shift, ssize); | |
643 | if (hidx & _PTEIDX_SECONDARY) | |
644 | hash = ~hash; | |
645 | ||
646 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
647 | slot += hidx & _PTEIDX_GROUP_IX; | |
648 | ||
649 | hptep = htab_address + slot; | |
650 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
651 | hpte_v = hpte_get_old_v(hptep); | |
652 | ||
653 | /* Even if we miss, we need to invalidate the TLB */ | |
654 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { | |
655 | /* recheck with locks held */ | |
656 | native_lock_hpte(hptep); | |
657 | hpte_v = hpte_get_old_v(hptep); | |
658 | ||
659 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { | |
660 | /* | |
661 | * Invalidate the hpte. NOTE: this also unlocks it | |
662 | */ | |
663 | ||
664 | hptep->v = 0; | |
665 | } else | |
666 | native_unlock_hpte(hptep); | |
667 | } | |
668 | /* | |
669 | * We need to do tlb invalidate for all the address, tlbie | |
670 | * instruction compares entry_VA in tlb with the VA specified | |
671 | * here | |
672 | */ | |
673 | tlbie(vpn, psize, actual_psize, ssize, local); | |
674 | } | |
675 | local_irq_restore(flags); | |
676 | } | |
677 | #else | |
678 | static void native_hugepage_invalidate(unsigned long vsid, | |
679 | unsigned long addr, | |
680 | unsigned char *hpte_slot_array, | |
681 | int psize, int ssize, int local) | |
682 | { | |
683 | WARN(1, "%s called without THP support\n", __func__); | |
684 | } | |
685 | #endif | |
686 | ||
687 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, | |
688 | int *psize, int *apsize, int *ssize, unsigned long *vpn) | |
689 | { | |
690 | unsigned long avpn, pteg, vpi; | |
691 | unsigned long hpte_v = be64_to_cpu(hpte->v); | |
692 | unsigned long hpte_r = be64_to_cpu(hpte->r); | |
693 | unsigned long vsid, seg_off; | |
694 | int size, a_size, shift; | |
695 | /* Look at the 8 bit LP value */ | |
696 | unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); | |
697 | ||
698 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { | |
699 | hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); | |
700 | hpte_r = hpte_new_to_old_r(hpte_r); | |
701 | } | |
702 | if (!(hpte_v & HPTE_V_LARGE)) { | |
703 | size = MMU_PAGE_4K; | |
704 | a_size = MMU_PAGE_4K; | |
705 | } else { | |
706 | size = hpte_page_sizes[lp] & 0xf; | |
707 | a_size = hpte_page_sizes[lp] >> 4; | |
708 | } | |
709 | /* This works for all page sizes, and for 256M and 1T segments */ | |
710 | *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; | |
711 | shift = mmu_psize_defs[size].shift; | |
712 | ||
713 | avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); | |
714 | pteg = slot / HPTES_PER_GROUP; | |
715 | if (hpte_v & HPTE_V_SECONDARY) | |
716 | pteg = ~pteg; | |
717 | ||
718 | switch (*ssize) { | |
719 | case MMU_SEGSIZE_256M: | |
720 | /* We only have 28 - 23 bits of seg_off in avpn */ | |
721 | seg_off = (avpn & 0x1f) << 23; | |
722 | vsid = avpn >> 5; | |
723 | /* We can find more bits from the pteg value */ | |
724 | if (shift < 23) { | |
725 | vpi = (vsid ^ pteg) & htab_hash_mask; | |
726 | seg_off |= vpi << shift; | |
727 | } | |
728 | *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; | |
729 | break; | |
730 | case MMU_SEGSIZE_1T: | |
731 | /* We only have 40 - 23 bits of seg_off in avpn */ | |
732 | seg_off = (avpn & 0x1ffff) << 23; | |
733 | vsid = avpn >> 17; | |
734 | if (shift < 23) { | |
735 | vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; | |
736 | seg_off |= vpi << shift; | |
737 | } | |
738 | *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; | |
739 | break; | |
740 | default: | |
741 | *vpn = size = 0; | |
742 | } | |
743 | *psize = size; | |
744 | *apsize = a_size; | |
745 | } | |
746 | ||
747 | /* | |
748 | * clear all mappings on kexec. All cpus are in real mode (or they will | |
749 | * be when they isi), and we are the only one left. We rely on our kernel | |
750 | * mapping being 0xC0's and the hardware ignoring those two real bits. | |
751 | * | |
752 | * This must be called with interrupts disabled. | |
753 | * | |
754 | * Taking the native_tlbie_lock is unsafe here due to the possibility of | |
755 | * lockdep being on. On pre POWER5 hardware, not taking the lock could | |
756 | * cause deadlock. POWER5 and newer not taking the lock is fine. This only | |
757 | * gets called during boot before secondary CPUs have come up and during | |
758 | * crashdump and all bets are off anyway. | |
759 | * | |
760 | * TODO: add batching support when enabled. remember, no dynamic memory here, | |
761 | * although there is the control page available... | |
762 | */ | |
763 | static void native_hpte_clear(void) | |
764 | { | |
765 | unsigned long vpn = 0; | |
766 | unsigned long slot, slots; | |
767 | struct hash_pte *hptep = htab_address; | |
768 | unsigned long hpte_v; | |
769 | unsigned long pteg_count; | |
770 | int psize, apsize, ssize; | |
771 | ||
772 | pteg_count = htab_hash_mask + 1; | |
773 | ||
774 | slots = pteg_count * HPTES_PER_GROUP; | |
775 | ||
776 | for (slot = 0; slot < slots; slot++, hptep++) { | |
777 | /* | |
778 | * we could lock the pte here, but we are the only cpu | |
779 | * running, right? and for crash dump, we probably | |
780 | * don't want to wait for a maybe bad cpu. | |
781 | */ | |
782 | hpte_v = be64_to_cpu(hptep->v); | |
783 | ||
784 | /* | |
785 | * Call __tlbie() here rather than tlbie() since we can't take the | |
786 | * native_tlbie_lock. | |
787 | */ | |
788 | if (hpte_v & HPTE_V_VALID) { | |
789 | hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); | |
790 | hptep->v = 0; | |
791 | ___tlbie(vpn, psize, apsize, ssize); | |
792 | } | |
793 | } | |
794 | ||
795 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | |
796 | } | |
797 | ||
798 | /* | |
799 | * Batched hash table flush, we batch the tlbie's to avoid taking/releasing | |
800 | * the lock all the time | |
801 | */ | |
802 | static void native_flush_hash_range(unsigned long number, int local) | |
803 | { | |
804 | unsigned long vpn = 0; | |
805 | unsigned long hash, index, hidx, shift, slot; | |
806 | struct hash_pte *hptep; | |
807 | unsigned long hpte_v; | |
808 | unsigned long want_v; | |
809 | unsigned long flags; | |
810 | real_pte_t pte; | |
811 | struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); | |
812 | unsigned long psize = batch->psize; | |
813 | int ssize = batch->ssize; | |
814 | int i; | |
815 | unsigned int use_local; | |
816 | ||
817 | use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && | |
818 | mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); | |
819 | ||
820 | local_irq_save(flags); | |
821 | ||
822 | for (i = 0; i < number; i++) { | |
823 | vpn = batch->vpn[i]; | |
824 | pte = batch->pte[i]; | |
825 | ||
826 | pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { | |
827 | hash = hpt_hash(vpn, shift, ssize); | |
828 | hidx = __rpte_to_hidx(pte, index); | |
829 | if (hidx & _PTEIDX_SECONDARY) | |
830 | hash = ~hash; | |
831 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
832 | slot += hidx & _PTEIDX_GROUP_IX; | |
833 | hptep = htab_address + slot; | |
834 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
835 | hpte_v = hpte_get_old_v(hptep); | |
836 | ||
837 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) | |
838 | continue; | |
839 | /* lock and try again */ | |
840 | native_lock_hpte(hptep); | |
841 | hpte_v = hpte_get_old_v(hptep); | |
842 | ||
843 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) | |
844 | native_unlock_hpte(hptep); | |
845 | else | |
846 | hptep->v = 0; | |
847 | ||
848 | } pte_iterate_hashed_end(); | |
849 | } | |
850 | ||
851 | if (use_local) { | |
852 | asm volatile("ptesync":::"memory"); | |
853 | for (i = 0; i < number; i++) { | |
854 | vpn = batch->vpn[i]; | |
855 | pte = batch->pte[i]; | |
856 | ||
857 | pte_iterate_hashed_subpages(pte, psize, | |
858 | vpn, index, shift) { | |
859 | __tlbiel(vpn, psize, psize, ssize); | |
860 | } pte_iterate_hashed_end(); | |
861 | } | |
862 | asm volatile("ptesync":::"memory"); | |
863 | } else { | |
864 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | |
865 | ||
866 | if (lock_tlbie) | |
867 | raw_spin_lock(&native_tlbie_lock); | |
868 | ||
869 | asm volatile("ptesync":::"memory"); | |
870 | for (i = 0; i < number; i++) { | |
871 | vpn = batch->vpn[i]; | |
872 | pte = batch->pte[i]; | |
873 | ||
874 | pte_iterate_hashed_subpages(pte, psize, | |
875 | vpn, index, shift) { | |
876 | __tlbie(vpn, psize, psize, ssize); | |
877 | } pte_iterate_hashed_end(); | |
878 | } | |
879 | /* | |
880 | * Just do one more with the last used values. | |
881 | */ | |
882 | fixup_tlbie_vpn(vpn, psize, psize, ssize); | |
883 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | |
884 | ||
885 | if (lock_tlbie) | |
886 | raw_spin_unlock(&native_tlbie_lock); | |
887 | } | |
888 | ||
889 | local_irq_restore(flags); | |
890 | } | |
891 | ||
892 | void __init hpte_init_native(void) | |
893 | { | |
894 | mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; | |
895 | mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; | |
896 | mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; | |
897 | mmu_hash_ops.hpte_removebolted = native_hpte_removebolted; | |
898 | mmu_hash_ops.hpte_insert = native_hpte_insert; | |
899 | mmu_hash_ops.hpte_remove = native_hpte_remove; | |
900 | mmu_hash_ops.hpte_clear_all = native_hpte_clear; | |
901 | mmu_hash_ops.flush_hash_range = native_flush_hash_range; | |
902 | mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; | |
903 | } |