]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-or-later | |
2 | /* | |
3 | * pSeries_lpar.c | |
4 | * Copyright (C) 2001 Todd Inglett, IBM Corporation | |
5 | * | |
6 | * pSeries LPAR support. | |
7 | */ | |
8 | ||
9 | /* Enables debugging of low-level hash table routines - careful! */ | |
10 | #undef DEBUG | |
11 | #define pr_fmt(fmt) "lpar: " fmt | |
12 | ||
13 | #include <linux/kernel.h> | |
14 | #include <linux/dma-mapping.h> | |
15 | #include <linux/console.h> | |
16 | #include <linux/export.h> | |
17 | #include <linux/jump_label.h> | |
18 | #include <linux/delay.h> | |
19 | #include <linux/stop_machine.h> | |
20 | #include <asm/processor.h> | |
21 | #include <asm/mmu.h> | |
22 | #include <asm/page.h> | |
23 | #include <asm/pgtable.h> | |
24 | #include <asm/machdep.h> | |
25 | #include <asm/mmu_context.h> | |
26 | #include <asm/iommu.h> | |
27 | #include <asm/tlb.h> | |
28 | #include <asm/prom.h> | |
29 | #include <asm/cputable.h> | |
30 | #include <asm/udbg.h> | |
31 | #include <asm/smp.h> | |
32 | #include <asm/trace.h> | |
33 | #include <asm/firmware.h> | |
34 | #include <asm/plpar_wrappers.h> | |
35 | #include <asm/kexec.h> | |
36 | #include <asm/fadump.h> | |
37 | #include <asm/asm-prototypes.h> | |
38 | #include <asm/debugfs.h> | |
39 | ||
40 | #include "pseries.h" | |
41 | ||
42 | /* Flag bits for H_BULK_REMOVE */ | |
43 | #define HBR_REQUEST 0x4000000000000000UL | |
44 | #define HBR_RESPONSE 0x8000000000000000UL | |
45 | #define HBR_END 0xc000000000000000UL | |
46 | #define HBR_AVPN 0x0200000000000000UL | |
47 | #define HBR_ANDCOND 0x0100000000000000UL | |
48 | ||
49 | ||
50 | /* in hvCall.S */ | |
51 | EXPORT_SYMBOL(plpar_hcall); | |
52 | EXPORT_SYMBOL(plpar_hcall9); | |
53 | EXPORT_SYMBOL(plpar_hcall_norets); | |
54 | ||
55 | void vpa_init(int cpu) | |
56 | { | |
57 | int hwcpu = get_hard_smp_processor_id(cpu); | |
58 | unsigned long addr; | |
59 | long ret; | |
60 | struct paca_struct *pp; | |
61 | struct dtl_entry *dtl; | |
62 | ||
63 | /* | |
64 | * The spec says it "may be problematic" if CPU x registers the VPA of | |
65 | * CPU y. We should never do that, but wail if we ever do. | |
66 | */ | |
67 | WARN_ON(cpu != smp_processor_id()); | |
68 | ||
69 | if (cpu_has_feature(CPU_FTR_ALTIVEC)) | |
70 | lppaca_of(cpu).vmxregs_in_use = 1; | |
71 | ||
72 | if (cpu_has_feature(CPU_FTR_ARCH_207S)) | |
73 | lppaca_of(cpu).ebb_regs_in_use = 1; | |
74 | ||
75 | addr = __pa(&lppaca_of(cpu)); | |
76 | ret = register_vpa(hwcpu, addr); | |
77 | ||
78 | if (ret) { | |
79 | pr_err("WARNING: VPA registration for cpu %d (hw %d) of area " | |
80 | "%lx failed with %ld\n", cpu, hwcpu, addr, ret); | |
81 | return; | |
82 | } | |
83 | ||
84 | #ifdef CONFIG_PPC_BOOK3S_64 | |
85 | /* | |
86 | * PAPR says this feature is SLB-Buffer but firmware never | |
87 | * reports that. All SPLPAR support SLB shadow buffer. | |
88 | */ | |
89 | if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) { | |
90 | addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr); | |
91 | ret = register_slb_shadow(hwcpu, addr); | |
92 | if (ret) | |
93 | pr_err("WARNING: SLB shadow buffer registration for " | |
94 | "cpu %d (hw %d) of area %lx failed with %ld\n", | |
95 | cpu, hwcpu, addr, ret); | |
96 | } | |
97 | #endif /* CONFIG_PPC_BOOK3S_64 */ | |
98 | ||
99 | /* | |
100 | * Register dispatch trace log, if one has been allocated. | |
101 | */ | |
102 | pp = paca_ptrs[cpu]; | |
103 | dtl = pp->dispatch_log; | |
104 | if (dtl) { | |
105 | pp->dtl_ridx = 0; | |
106 | pp->dtl_curr = dtl; | |
107 | lppaca_of(cpu).dtl_idx = 0; | |
108 | ||
109 | /* hypervisor reads buffer length from this field */ | |
110 | dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES); | |
111 | ret = register_dtl(hwcpu, __pa(dtl)); | |
112 | if (ret) | |
113 | pr_err("WARNING: DTL registration of cpu %d (hw %d) " | |
114 | "failed with %ld\n", smp_processor_id(), | |
115 | hwcpu, ret); | |
116 | lppaca_of(cpu).dtl_enable_mask = 2; | |
117 | } | |
118 | } | |
119 | ||
120 | #ifdef CONFIG_PPC_BOOK3S_64 | |
121 | ||
122 | static long pSeries_lpar_hpte_insert(unsigned long hpte_group, | |
123 | unsigned long vpn, unsigned long pa, | |
124 | unsigned long rflags, unsigned long vflags, | |
125 | int psize, int apsize, int ssize) | |
126 | { | |
127 | unsigned long lpar_rc; | |
128 | unsigned long flags; | |
129 | unsigned long slot; | |
130 | unsigned long hpte_v, hpte_r; | |
131 | ||
132 | if (!(vflags & HPTE_V_BOLTED)) | |
133 | pr_devel("hpte_insert(group=%lx, vpn=%016lx, " | |
134 | "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n", | |
135 | hpte_group, vpn, pa, rflags, vflags, psize); | |
136 | ||
137 | hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; | |
138 | hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; | |
139 | ||
140 | if (!(vflags & HPTE_V_BOLTED)) | |
141 | pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); | |
142 | ||
143 | /* Now fill in the actual HPTE */ | |
144 | /* Set CEC cookie to 0 */ | |
145 | /* Zero page = 0 */ | |
146 | /* I-cache Invalidate = 0 */ | |
147 | /* I-cache synchronize = 0 */ | |
148 | /* Exact = 0 */ | |
149 | flags = 0; | |
150 | ||
151 | if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) | |
152 | flags |= H_COALESCE_CAND; | |
153 | ||
154 | lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); | |
155 | if (unlikely(lpar_rc == H_PTEG_FULL)) { | |
156 | pr_devel("Hash table group is full\n"); | |
157 | return -1; | |
158 | } | |
159 | ||
160 | /* | |
161 | * Since we try and ioremap PHBs we don't own, the pte insert | |
162 | * will fail. However we must catch the failure in hash_page | |
163 | * or we will loop forever, so return -2 in this case. | |
164 | */ | |
165 | if (unlikely(lpar_rc != H_SUCCESS)) { | |
166 | pr_err("Failed hash pte insert with error %ld\n", lpar_rc); | |
167 | return -2; | |
168 | } | |
169 | if (!(vflags & HPTE_V_BOLTED)) | |
170 | pr_devel(" -> slot: %lu\n", slot & 7); | |
171 | ||
172 | /* Because of iSeries, we have to pass down the secondary | |
173 | * bucket bit here as well | |
174 | */ | |
175 | return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3); | |
176 | } | |
177 | ||
178 | static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock); | |
179 | ||
180 | static long pSeries_lpar_hpte_remove(unsigned long hpte_group) | |
181 | { | |
182 | unsigned long slot_offset; | |
183 | unsigned long lpar_rc; | |
184 | int i; | |
185 | unsigned long dummy1, dummy2; | |
186 | ||
187 | /* pick a random slot to start at */ | |
188 | slot_offset = mftb() & 0x7; | |
189 | ||
190 | for (i = 0; i < HPTES_PER_GROUP; i++) { | |
191 | ||
192 | /* don't remove a bolted entry */ | |
193 | lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset, | |
194 | (0x1UL << 4), &dummy1, &dummy2); | |
195 | if (lpar_rc == H_SUCCESS) | |
196 | return i; | |
197 | ||
198 | /* | |
199 | * The test for adjunct partition is performed before the | |
200 | * ANDCOND test. H_RESOURCE may be returned, so we need to | |
201 | * check for that as well. | |
202 | */ | |
203 | BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE); | |
204 | ||
205 | slot_offset++; | |
206 | slot_offset &= 0x7; | |
207 | } | |
208 | ||
209 | return -1; | |
210 | } | |
211 | ||
212 | static void manual_hpte_clear_all(void) | |
213 | { | |
214 | unsigned long size_bytes = 1UL << ppc64_pft_size; | |
215 | unsigned long hpte_count = size_bytes >> 4; | |
216 | struct { | |
217 | unsigned long pteh; | |
218 | unsigned long ptel; | |
219 | } ptes[4]; | |
220 | long lpar_rc; | |
221 | unsigned long i, j; | |
222 | ||
223 | /* Read in batches of 4, | |
224 | * invalidate only valid entries not in the VRMA | |
225 | * hpte_count will be a multiple of 4 | |
226 | */ | |
227 | for (i = 0; i < hpte_count; i += 4) { | |
228 | lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes); | |
229 | if (lpar_rc != H_SUCCESS) { | |
230 | pr_info("Failed to read hash page table at %ld err %ld\n", | |
231 | i, lpar_rc); | |
232 | continue; | |
233 | } | |
234 | for (j = 0; j < 4; j++){ | |
235 | if ((ptes[j].pteh & HPTE_V_VRMA_MASK) == | |
236 | HPTE_V_VRMA_MASK) | |
237 | continue; | |
238 | if (ptes[j].pteh & HPTE_V_VALID) | |
239 | plpar_pte_remove_raw(0, i + j, 0, | |
240 | &(ptes[j].pteh), &(ptes[j].ptel)); | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
245 | static int hcall_hpte_clear_all(void) | |
246 | { | |
247 | int rc; | |
248 | ||
249 | do { | |
250 | rc = plpar_hcall_norets(H_CLEAR_HPT); | |
251 | } while (rc == H_CONTINUE); | |
252 | ||
253 | return rc; | |
254 | } | |
255 | ||
256 | static void pseries_hpte_clear_all(void) | |
257 | { | |
258 | int rc; | |
259 | ||
260 | rc = hcall_hpte_clear_all(); | |
261 | if (rc != H_SUCCESS) | |
262 | manual_hpte_clear_all(); | |
263 | ||
264 | #ifdef __LITTLE_ENDIAN__ | |
265 | /* | |
266 | * Reset exceptions to big endian. | |
267 | * | |
268 | * FIXME this is a hack for kexec, we need to reset the exception | |
269 | * endian before starting the new kernel and this is a convenient place | |
270 | * to do it. | |
271 | * | |
272 | * This is also called on boot when a fadump happens. In that case we | |
273 | * must not change the exception endian mode. | |
274 | */ | |
275 | if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active()) | |
276 | pseries_big_endian_exceptions(); | |
277 | #endif | |
278 | } | |
279 | ||
280 | /* | |
281 | * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and | |
282 | * the low 3 bits of flags happen to line up. So no transform is needed. | |
283 | * We can probably optimize here and assume the high bits of newpp are | |
284 | * already zero. For now I am paranoid. | |
285 | */ | |
286 | static long pSeries_lpar_hpte_updatepp(unsigned long slot, | |
287 | unsigned long newpp, | |
288 | unsigned long vpn, | |
289 | int psize, int apsize, | |
290 | int ssize, unsigned long inv_flags) | |
291 | { | |
292 | unsigned long lpar_rc; | |
293 | unsigned long flags; | |
294 | unsigned long want_v; | |
295 | ||
296 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
297 | ||
298 | flags = (newpp & 7) | H_AVPN; | |
299 | if (mmu_has_feature(MMU_FTR_KERNEL_RO)) | |
300 | /* Move pp0 into bit 8 (IBM 55) */ | |
301 | flags |= (newpp & HPTE_R_PP0) >> 55; | |
302 | ||
303 | pr_devel(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", | |
304 | want_v, slot, flags, psize); | |
305 | ||
306 | lpar_rc = plpar_pte_protect(flags, slot, want_v); | |
307 | ||
308 | if (lpar_rc == H_NOT_FOUND) { | |
309 | pr_devel("not found !\n"); | |
310 | return -1; | |
311 | } | |
312 | ||
313 | pr_devel("ok\n"); | |
314 | ||
315 | BUG_ON(lpar_rc != H_SUCCESS); | |
316 | ||
317 | return 0; | |
318 | } | |
319 | ||
320 | static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group) | |
321 | { | |
322 | long lpar_rc; | |
323 | unsigned long i, j; | |
324 | struct { | |
325 | unsigned long pteh; | |
326 | unsigned long ptel; | |
327 | } ptes[4]; | |
328 | ||
329 | for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) { | |
330 | ||
331 | lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes); | |
332 | if (lpar_rc != H_SUCCESS) { | |
333 | pr_info("Failed to read hash page table at %ld err %ld\n", | |
334 | hpte_group, lpar_rc); | |
335 | continue; | |
336 | } | |
337 | ||
338 | for (j = 0; j < 4; j++) { | |
339 | if (HPTE_V_COMPARE(ptes[j].pteh, want_v) && | |
340 | (ptes[j].pteh & HPTE_V_VALID)) | |
341 | return i + j; | |
342 | } | |
343 | } | |
344 | ||
345 | return -1; | |
346 | } | |
347 | ||
348 | static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize) | |
349 | { | |
350 | long slot; | |
351 | unsigned long hash; | |
352 | unsigned long want_v; | |
353 | unsigned long hpte_group; | |
354 | ||
355 | hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); | |
356 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
357 | ||
358 | /* Bolted entries are always in the primary group */ | |
359 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
360 | slot = __pSeries_lpar_hpte_find(want_v, hpte_group); | |
361 | if (slot < 0) | |
362 | return -1; | |
363 | return hpte_group + slot; | |
364 | } | |
365 | ||
366 | static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp, | |
367 | unsigned long ea, | |
368 | int psize, int ssize) | |
369 | { | |
370 | unsigned long vpn; | |
371 | unsigned long lpar_rc, slot, vsid, flags; | |
372 | ||
373 | vsid = get_kernel_vsid(ea, ssize); | |
374 | vpn = hpt_vpn(ea, vsid, ssize); | |
375 | ||
376 | slot = pSeries_lpar_hpte_find(vpn, psize, ssize); | |
377 | BUG_ON(slot == -1); | |
378 | ||
379 | flags = newpp & 7; | |
380 | if (mmu_has_feature(MMU_FTR_KERNEL_RO)) | |
381 | /* Move pp0 into bit 8 (IBM 55) */ | |
382 | flags |= (newpp & HPTE_R_PP0) >> 55; | |
383 | ||
384 | lpar_rc = plpar_pte_protect(flags, slot, 0); | |
385 | ||
386 | BUG_ON(lpar_rc != H_SUCCESS); | |
387 | } | |
388 | ||
389 | static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn, | |
390 | int psize, int apsize, | |
391 | int ssize, int local) | |
392 | { | |
393 | unsigned long want_v; | |
394 | unsigned long lpar_rc; | |
395 | unsigned long dummy1, dummy2; | |
396 | ||
397 | pr_devel(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n", | |
398 | slot, vpn, psize, local); | |
399 | ||
400 | want_v = hpte_encode_avpn(vpn, psize, ssize); | |
401 | lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); | |
402 | if (lpar_rc == H_NOT_FOUND) | |
403 | return; | |
404 | ||
405 | BUG_ON(lpar_rc != H_SUCCESS); | |
406 | } | |
407 | ||
408 | ||
409 | /* | |
410 | * As defined in the PAPR's section 14.5.4.1.8 | |
411 | * The control mask doesn't include the returned reference and change bit from | |
412 | * the processed PTE. | |
413 | */ | |
414 | #define HBLKR_AVPN 0x0100000000000000UL | |
415 | #define HBLKR_CTRL_MASK 0xf800000000000000UL | |
416 | #define HBLKR_CTRL_SUCCESS 0x8000000000000000UL | |
417 | #define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL | |
418 | #define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL | |
419 | ||
420 | /** | |
421 | * H_BLOCK_REMOVE caller. | |
422 | * @idx should point to the latest @param entry set with a PTEX. | |
423 | * If PTE cannot be processed because another CPUs has already locked that | |
424 | * group, those entries are put back in @param starting at index 1. | |
425 | * If entries has to be retried and @retry_busy is set to true, these entries | |
426 | * are retried until success. If @retry_busy is set to false, the returned | |
427 | * is the number of entries yet to process. | |
428 | */ | |
429 | static unsigned long call_block_remove(unsigned long idx, unsigned long *param, | |
430 | bool retry_busy) | |
431 | { | |
432 | unsigned long i, rc, new_idx; | |
433 | unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; | |
434 | ||
435 | if (idx < 2) { | |
436 | pr_warn("Unexpected empty call to H_BLOCK_REMOVE"); | |
437 | return 0; | |
438 | } | |
439 | again: | |
440 | new_idx = 0; | |
441 | if (idx > PLPAR_HCALL9_BUFSIZE) { | |
442 | pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx); | |
443 | idx = PLPAR_HCALL9_BUFSIZE; | |
444 | } else if (idx < PLPAR_HCALL9_BUFSIZE) | |
445 | param[idx] = HBR_END; | |
446 | ||
447 | rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf, | |
448 | param[0], /* AVA */ | |
449 | param[1], param[2], param[3], param[4], /* TS0-7 */ | |
450 | param[5], param[6], param[7], param[8]); | |
451 | if (rc == H_SUCCESS) | |
452 | return 0; | |
453 | ||
454 | BUG_ON(rc != H_PARTIAL); | |
455 | ||
456 | /* Check that the unprocessed entries were 'not found' or 'busy' */ | |
457 | for (i = 0; i < idx-1; i++) { | |
458 | unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK; | |
459 | ||
460 | if (ctrl == HBLKR_CTRL_ERRBUSY) { | |
461 | param[++new_idx] = param[i+1]; | |
462 | continue; | |
463 | } | |
464 | ||
465 | BUG_ON(ctrl != HBLKR_CTRL_SUCCESS | |
466 | && ctrl != HBLKR_CTRL_ERRNOTFOUND); | |
467 | } | |
468 | ||
469 | /* | |
470 | * If there were entries found busy, retry these entries if requested, | |
471 | * of if all the entries have to be retried. | |
472 | */ | |
473 | if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) { | |
474 | idx = new_idx + 1; | |
475 | goto again; | |
476 | } | |
477 | ||
478 | return new_idx; | |
479 | } | |
480 | ||
481 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
482 | /* | |
483 | * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need | |
484 | * to make sure that we avoid bouncing the hypervisor tlbie lock. | |
485 | */ | |
486 | #define PPC64_HUGE_HPTE_BATCH 12 | |
487 | ||
488 | static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn, | |
489 | int count, int psize, int ssize) | |
490 | { | |
491 | unsigned long param[PLPAR_HCALL9_BUFSIZE]; | |
492 | unsigned long shift, current_vpgb, vpgb; | |
493 | int i, pix = 0; | |
494 | ||
495 | shift = mmu_psize_defs[psize].shift; | |
496 | ||
497 | for (i = 0; i < count; i++) { | |
498 | /* | |
499 | * Shifting 3 bits more on the right to get a | |
500 | * 8 pages aligned virtual addresse. | |
501 | */ | |
502 | vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3)); | |
503 | if (!pix || vpgb != current_vpgb) { | |
504 | /* | |
505 | * Need to start a new 8 pages block, flush | |
506 | * the current one if needed. | |
507 | */ | |
508 | if (pix) | |
509 | (void)call_block_remove(pix, param, true); | |
510 | current_vpgb = vpgb; | |
511 | param[0] = hpte_encode_avpn(vpn[i], psize, ssize); | |
512 | pix = 1; | |
513 | } | |
514 | ||
515 | param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i]; | |
516 | if (pix == PLPAR_HCALL9_BUFSIZE) { | |
517 | pix = call_block_remove(pix, param, false); | |
518 | /* | |
519 | * pix = 0 means that all the entries were | |
520 | * removed, we can start a new block. | |
521 | * Otherwise, this means that there are entries | |
522 | * to retry, and pix points to latest one, so | |
523 | * we should increment it and try to continue | |
524 | * the same block. | |
525 | */ | |
526 | if (pix) | |
527 | pix++; | |
528 | } | |
529 | } | |
530 | if (pix) | |
531 | (void)call_block_remove(pix, param, true); | |
532 | } | |
533 | ||
534 | static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn, | |
535 | int count, int psize, int ssize) | |
536 | { | |
537 | unsigned long param[PLPAR_HCALL9_BUFSIZE]; | |
538 | int i = 0, pix = 0, rc; | |
539 | ||
540 | for (i = 0; i < count; i++) { | |
541 | ||
542 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { | |
543 | pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0, | |
544 | ssize, 0); | |
545 | } else { | |
546 | param[pix] = HBR_REQUEST | HBR_AVPN | slot[i]; | |
547 | param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize); | |
548 | pix += 2; | |
549 | if (pix == 8) { | |
550 | rc = plpar_hcall9(H_BULK_REMOVE, param, | |
551 | param[0], param[1], param[2], | |
552 | param[3], param[4], param[5], | |
553 | param[6], param[7]); | |
554 | BUG_ON(rc != H_SUCCESS); | |
555 | pix = 0; | |
556 | } | |
557 | } | |
558 | } | |
559 | if (pix) { | |
560 | param[pix] = HBR_END; | |
561 | rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], | |
562 | param[2], param[3], param[4], param[5], | |
563 | param[6], param[7]); | |
564 | BUG_ON(rc != H_SUCCESS); | |
565 | } | |
566 | } | |
567 | ||
568 | static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot, | |
569 | unsigned long *vpn, | |
570 | int count, int psize, | |
571 | int ssize) | |
572 | { | |
573 | unsigned long flags = 0; | |
574 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | |
575 | ||
576 | if (lock_tlbie) | |
577 | spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); | |
578 | ||
579 | if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) | |
580 | hugepage_block_invalidate(slot, vpn, count, psize, ssize); | |
581 | else | |
582 | hugepage_bulk_invalidate(slot, vpn, count, psize, ssize); | |
583 | ||
584 | if (lock_tlbie) | |
585 | spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); | |
586 | } | |
587 | ||
588 | static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, | |
589 | unsigned long addr, | |
590 | unsigned char *hpte_slot_array, | |
591 | int psize, int ssize, int local) | |
592 | { | |
593 | int i, index = 0; | |
594 | unsigned long s_addr = addr; | |
595 | unsigned int max_hpte_count, valid; | |
596 | unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; | |
597 | unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; | |
598 | unsigned long shift, hidx, vpn = 0, hash, slot; | |
599 | ||
600 | shift = mmu_psize_defs[psize].shift; | |
601 | max_hpte_count = 1U << (PMD_SHIFT - shift); | |
602 | ||
603 | for (i = 0; i < max_hpte_count; i++) { | |
604 | valid = hpte_valid(hpte_slot_array, i); | |
605 | if (!valid) | |
606 | continue; | |
607 | hidx = hpte_hash_index(hpte_slot_array, i); | |
608 | ||
609 | /* get the vpn */ | |
610 | addr = s_addr + (i * (1ul << shift)); | |
611 | vpn = hpt_vpn(addr, vsid, ssize); | |
612 | hash = hpt_hash(vpn, shift, ssize); | |
613 | if (hidx & _PTEIDX_SECONDARY) | |
614 | hash = ~hash; | |
615 | ||
616 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
617 | slot += hidx & _PTEIDX_GROUP_IX; | |
618 | ||
619 | slot_array[index] = slot; | |
620 | vpn_array[index] = vpn; | |
621 | if (index == PPC64_HUGE_HPTE_BATCH - 1) { | |
622 | /* | |
623 | * Now do a bluk invalidate | |
624 | */ | |
625 | __pSeries_lpar_hugepage_invalidate(slot_array, | |
626 | vpn_array, | |
627 | PPC64_HUGE_HPTE_BATCH, | |
628 | psize, ssize); | |
629 | index = 0; | |
630 | } else | |
631 | index++; | |
632 | } | |
633 | if (index) | |
634 | __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array, | |
635 | index, psize, ssize); | |
636 | } | |
637 | #else | |
638 | static void pSeries_lpar_hugepage_invalidate(unsigned long vsid, | |
639 | unsigned long addr, | |
640 | unsigned char *hpte_slot_array, | |
641 | int psize, int ssize, int local) | |
642 | { | |
643 | WARN(1, "%s called without THP support\n", __func__); | |
644 | } | |
645 | #endif | |
646 | ||
647 | static int pSeries_lpar_hpte_removebolted(unsigned long ea, | |
648 | int psize, int ssize) | |
649 | { | |
650 | unsigned long vpn; | |
651 | unsigned long slot, vsid; | |
652 | ||
653 | vsid = get_kernel_vsid(ea, ssize); | |
654 | vpn = hpt_vpn(ea, vsid, ssize); | |
655 | ||
656 | slot = pSeries_lpar_hpte_find(vpn, psize, ssize); | |
657 | if (slot == -1) | |
658 | return -ENOENT; | |
659 | ||
660 | /* | |
661 | * lpar doesn't use the passed actual page size | |
662 | */ | |
663 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0); | |
664 | return 0; | |
665 | } | |
666 | ||
667 | ||
668 | static inline unsigned long compute_slot(real_pte_t pte, | |
669 | unsigned long vpn, | |
670 | unsigned long index, | |
671 | unsigned long shift, | |
672 | int ssize) | |
673 | { | |
674 | unsigned long slot, hash, hidx; | |
675 | ||
676 | hash = hpt_hash(vpn, shift, ssize); | |
677 | hidx = __rpte_to_hidx(pte, index); | |
678 | if (hidx & _PTEIDX_SECONDARY) | |
679 | hash = ~hash; | |
680 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | |
681 | slot += hidx & _PTEIDX_GROUP_IX; | |
682 | return slot; | |
683 | } | |
684 | ||
685 | /** | |
686 | * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are | |
687 | * "all within the same naturally aligned 8 page virtual address block". | |
688 | */ | |
689 | static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch, | |
690 | unsigned long *param) | |
691 | { | |
692 | unsigned long vpn; | |
693 | unsigned long i, pix = 0; | |
694 | unsigned long index, shift, slot, current_vpgb, vpgb; | |
695 | real_pte_t pte; | |
696 | int psize, ssize; | |
697 | ||
698 | psize = batch->psize; | |
699 | ssize = batch->ssize; | |
700 | ||
701 | for (i = 0; i < number; i++) { | |
702 | vpn = batch->vpn[i]; | |
703 | pte = batch->pte[i]; | |
704 | pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { | |
705 | /* | |
706 | * Shifting 3 bits more on the right to get a | |
707 | * 8 pages aligned virtual addresse. | |
708 | */ | |
709 | vpgb = (vpn >> (shift - VPN_SHIFT + 3)); | |
710 | if (!pix || vpgb != current_vpgb) { | |
711 | /* | |
712 | * Need to start a new 8 pages block, flush | |
713 | * the current one if needed. | |
714 | */ | |
715 | if (pix) | |
716 | (void)call_block_remove(pix, param, | |
717 | true); | |
718 | current_vpgb = vpgb; | |
719 | param[0] = hpte_encode_avpn(vpn, psize, | |
720 | ssize); | |
721 | pix = 1; | |
722 | } | |
723 | ||
724 | slot = compute_slot(pte, vpn, index, shift, ssize); | |
725 | param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot; | |
726 | ||
727 | if (pix == PLPAR_HCALL9_BUFSIZE) { | |
728 | pix = call_block_remove(pix, param, false); | |
729 | /* | |
730 | * pix = 0 means that all the entries were | |
731 | * removed, we can start a new block. | |
732 | * Otherwise, this means that there are entries | |
733 | * to retry, and pix points to latest one, so | |
734 | * we should increment it and try to continue | |
735 | * the same block. | |
736 | */ | |
737 | if (pix) | |
738 | pix++; | |
739 | } | |
740 | } pte_iterate_hashed_end(); | |
741 | } | |
742 | ||
743 | if (pix) | |
744 | (void)call_block_remove(pix, param, true); | |
745 | } | |
746 | ||
747 | /* | |
748 | * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie | |
749 | * lock. | |
750 | */ | |
751 | static void pSeries_lpar_flush_hash_range(unsigned long number, int local) | |
752 | { | |
753 | unsigned long vpn; | |
754 | unsigned long i, pix, rc; | |
755 | unsigned long flags = 0; | |
756 | struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); | |
757 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | |
758 | unsigned long param[PLPAR_HCALL9_BUFSIZE]; | |
759 | unsigned long index, shift, slot; | |
760 | real_pte_t pte; | |
761 | int psize, ssize; | |
762 | ||
763 | if (lock_tlbie) | |
764 | spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags); | |
765 | ||
766 | if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) { | |
767 | do_block_remove(number, batch, param); | |
768 | goto out; | |
769 | } | |
770 | ||
771 | psize = batch->psize; | |
772 | ssize = batch->ssize; | |
773 | pix = 0; | |
774 | for (i = 0; i < number; i++) { | |
775 | vpn = batch->vpn[i]; | |
776 | pte = batch->pte[i]; | |
777 | pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { | |
778 | slot = compute_slot(pte, vpn, index, shift, ssize); | |
779 | if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { | |
780 | /* | |
781 | * lpar doesn't use the passed actual page size | |
782 | */ | |
783 | pSeries_lpar_hpte_invalidate(slot, vpn, psize, | |
784 | 0, ssize, local); | |
785 | } else { | |
786 | param[pix] = HBR_REQUEST | HBR_AVPN | slot; | |
787 | param[pix+1] = hpte_encode_avpn(vpn, psize, | |
788 | ssize); | |
789 | pix += 2; | |
790 | if (pix == 8) { | |
791 | rc = plpar_hcall9(H_BULK_REMOVE, param, | |
792 | param[0], param[1], param[2], | |
793 | param[3], param[4], param[5], | |
794 | param[6], param[7]); | |
795 | BUG_ON(rc != H_SUCCESS); | |
796 | pix = 0; | |
797 | } | |
798 | } | |
799 | } pte_iterate_hashed_end(); | |
800 | } | |
801 | if (pix) { | |
802 | param[pix] = HBR_END; | |
803 | rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1], | |
804 | param[2], param[3], param[4], param[5], | |
805 | param[6], param[7]); | |
806 | BUG_ON(rc != H_SUCCESS); | |
807 | } | |
808 | ||
809 | out: | |
810 | if (lock_tlbie) | |
811 | spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); | |
812 | } | |
813 | ||
814 | static int __init disable_bulk_remove(char *str) | |
815 | { | |
816 | if (strcmp(str, "off") == 0 && | |
817 | firmware_has_feature(FW_FEATURE_BULK_REMOVE)) { | |
818 | pr_info("Disabling BULK_REMOVE firmware feature"); | |
819 | powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE; | |
820 | } | |
821 | return 1; | |
822 | } | |
823 | ||
824 | __setup("bulk_remove=", disable_bulk_remove); | |
825 | ||
826 | #define HPT_RESIZE_TIMEOUT 10000 /* ms */ | |
827 | ||
828 | struct hpt_resize_state { | |
829 | unsigned long shift; | |
830 | int commit_rc; | |
831 | }; | |
832 | ||
833 | static int pseries_lpar_resize_hpt_commit(void *data) | |
834 | { | |
835 | struct hpt_resize_state *state = data; | |
836 | ||
837 | state->commit_rc = plpar_resize_hpt_commit(0, state->shift); | |
838 | if (state->commit_rc != H_SUCCESS) | |
839 | return -EIO; | |
840 | ||
841 | /* Hypervisor has transitioned the HTAB, update our globals */ | |
842 | ppc64_pft_size = state->shift; | |
843 | htab_size_bytes = 1UL << ppc64_pft_size; | |
844 | htab_hash_mask = (htab_size_bytes >> 7) - 1; | |
845 | ||
846 | return 0; | |
847 | } | |
848 | ||
849 | /* Must be called in user context */ | |
850 | static int pseries_lpar_resize_hpt(unsigned long shift) | |
851 | { | |
852 | struct hpt_resize_state state = { | |
853 | .shift = shift, | |
854 | .commit_rc = H_FUNCTION, | |
855 | }; | |
856 | unsigned int delay, total_delay = 0; | |
857 | int rc; | |
858 | ktime_t t0, t1, t2; | |
859 | ||
860 | might_sleep(); | |
861 | ||
862 | if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE)) | |
863 | return -ENODEV; | |
864 | ||
865 | pr_info("Attempting to resize HPT to shift %lu\n", shift); | |
866 | ||
867 | t0 = ktime_get(); | |
868 | ||
869 | rc = plpar_resize_hpt_prepare(0, shift); | |
870 | while (H_IS_LONG_BUSY(rc)) { | |
871 | delay = get_longbusy_msecs(rc); | |
872 | total_delay += delay; | |
873 | if (total_delay > HPT_RESIZE_TIMEOUT) { | |
874 | /* prepare with shift==0 cancels an in-progress resize */ | |
875 | rc = plpar_resize_hpt_prepare(0, 0); | |
876 | if (rc != H_SUCCESS) | |
877 | pr_warn("Unexpected error %d cancelling timed out HPT resize\n", | |
878 | rc); | |
879 | return -ETIMEDOUT; | |
880 | } | |
881 | msleep(delay); | |
882 | rc = plpar_resize_hpt_prepare(0, shift); | |
883 | }; | |
884 | ||
885 | switch (rc) { | |
886 | case H_SUCCESS: | |
887 | /* Continue on */ | |
888 | break; | |
889 | ||
890 | case H_PARAMETER: | |
891 | pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n"); | |
892 | return -EINVAL; | |
893 | case H_RESOURCE: | |
894 | pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n"); | |
895 | return -EPERM; | |
896 | default: | |
897 | pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc); | |
898 | return -EIO; | |
899 | } | |
900 | ||
901 | t1 = ktime_get(); | |
902 | ||
903 | rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL); | |
904 | ||
905 | t2 = ktime_get(); | |
906 | ||
907 | if (rc != 0) { | |
908 | switch (state.commit_rc) { | |
909 | case H_PTEG_FULL: | |
910 | return -ENOSPC; | |
911 | ||
912 | default: | |
913 | pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n", | |
914 | state.commit_rc); | |
915 | return -EIO; | |
916 | }; | |
917 | } | |
918 | ||
919 | pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n", | |
920 | shift, (long long) ktime_ms_delta(t1, t0), | |
921 | (long long) ktime_ms_delta(t2, t1)); | |
922 | ||
923 | return 0; | |
924 | } | |
925 | ||
926 | static int pseries_lpar_register_process_table(unsigned long base, | |
927 | unsigned long page_size, unsigned long table_size) | |
928 | { | |
929 | long rc; | |
930 | unsigned long flags = 0; | |
931 | ||
932 | if (table_size) | |
933 | flags |= PROC_TABLE_NEW; | |
934 | if (radix_enabled()) | |
935 | flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE; | |
936 | else | |
937 | flags |= PROC_TABLE_HPT_SLB; | |
938 | for (;;) { | |
939 | rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base, | |
940 | page_size, table_size); | |
941 | if (!H_IS_LONG_BUSY(rc)) | |
942 | break; | |
943 | mdelay(get_longbusy_msecs(rc)); | |
944 | } | |
945 | if (rc != H_SUCCESS) { | |
946 | pr_err("Failed to register process table (rc=%ld)\n", rc); | |
947 | BUG(); | |
948 | } | |
949 | return rc; | |
950 | } | |
951 | ||
952 | void __init hpte_init_pseries(void) | |
953 | { | |
954 | mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; | |
955 | mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; | |
956 | mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; | |
957 | mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert; | |
958 | mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; | |
959 | mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; | |
960 | mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; | |
961 | mmu_hash_ops.hpte_clear_all = pseries_hpte_clear_all; | |
962 | mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; | |
963 | register_process_table = pseries_lpar_register_process_table; | |
964 | ||
965 | if (firmware_has_feature(FW_FEATURE_HPT_RESIZE)) | |
966 | mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt; | |
967 | } | |
968 | ||
969 | void radix_init_pseries(void) | |
970 | { | |
971 | pr_info("Using radix MMU under hypervisor\n"); | |
972 | register_process_table = pseries_lpar_register_process_table; | |
973 | } | |
974 | ||
975 | #ifdef CONFIG_PPC_SMLPAR | |
976 | #define CMO_FREE_HINT_DEFAULT 1 | |
977 | static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT; | |
978 | ||
979 | static int __init cmo_free_hint(char *str) | |
980 | { | |
981 | char *parm; | |
982 | parm = strstrip(str); | |
983 | ||
984 | if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) { | |
985 | pr_info("%s: CMO free page hinting is not active.\n", __func__); | |
986 | cmo_free_hint_flag = 0; | |
987 | return 1; | |
988 | } | |
989 | ||
990 | cmo_free_hint_flag = 1; | |
991 | pr_info("%s: CMO free page hinting is active.\n", __func__); | |
992 | ||
993 | if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0) | |
994 | return 1; | |
995 | ||
996 | return 0; | |
997 | } | |
998 | ||
999 | __setup("cmo_free_hint=", cmo_free_hint); | |
1000 | ||
1001 | static void pSeries_set_page_state(struct page *page, int order, | |
1002 | unsigned long state) | |
1003 | { | |
1004 | int i, j; | |
1005 | unsigned long cmo_page_sz, addr; | |
1006 | ||
1007 | cmo_page_sz = cmo_get_page_size(); | |
1008 | addr = __pa((unsigned long)page_address(page)); | |
1009 | ||
1010 | for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) { | |
1011 | for (j = 0; j < PAGE_SIZE; j += cmo_page_sz) | |
1012 | plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0); | |
1013 | } | |
1014 | } | |
1015 | ||
1016 | void arch_free_page(struct page *page, int order) | |
1017 | { | |
1018 | if (radix_enabled()) | |
1019 | return; | |
1020 | if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO)) | |
1021 | return; | |
1022 | ||
1023 | pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED); | |
1024 | } | |
1025 | EXPORT_SYMBOL(arch_free_page); | |
1026 | ||
1027 | #endif /* CONFIG_PPC_SMLPAR */ | |
1028 | #endif /* CONFIG_PPC_BOOK3S_64 */ | |
1029 | ||
1030 | #ifdef CONFIG_TRACEPOINTS | |
1031 | #ifdef CONFIG_JUMP_LABEL | |
1032 | struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; | |
1033 | ||
1034 | int hcall_tracepoint_regfunc(void) | |
1035 | { | |
1036 | static_key_slow_inc(&hcall_tracepoint_key); | |
1037 | return 0; | |
1038 | } | |
1039 | ||
1040 | void hcall_tracepoint_unregfunc(void) | |
1041 | { | |
1042 | static_key_slow_dec(&hcall_tracepoint_key); | |
1043 | } | |
1044 | #else | |
1045 | /* | |
1046 | * We optimise our hcall path by placing hcall_tracepoint_refcount | |
1047 | * directly in the TOC so we can check if the hcall tracepoints are | |
1048 | * enabled via a single load. | |
1049 | */ | |
1050 | ||
1051 | /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ | |
1052 | extern long hcall_tracepoint_refcount; | |
1053 | ||
1054 | int hcall_tracepoint_regfunc(void) | |
1055 | { | |
1056 | hcall_tracepoint_refcount++; | |
1057 | return 0; | |
1058 | } | |
1059 | ||
1060 | void hcall_tracepoint_unregfunc(void) | |
1061 | { | |
1062 | hcall_tracepoint_refcount--; | |
1063 | } | |
1064 | #endif | |
1065 | ||
1066 | /* | |
1067 | * Since the tracing code might execute hcalls we need to guard against | |
1068 | * recursion. One example of this are spinlocks calling H_YIELD on | |
1069 | * shared processor partitions. | |
1070 | */ | |
1071 | static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); | |
1072 | ||
1073 | ||
1074 | void __trace_hcall_entry(unsigned long opcode, unsigned long *args) | |
1075 | { | |
1076 | unsigned long flags; | |
1077 | unsigned int *depth; | |
1078 | ||
1079 | /* | |
1080 | * We cannot call tracepoints inside RCU idle regions which | |
1081 | * means we must not trace H_CEDE. | |
1082 | */ | |
1083 | if (opcode == H_CEDE) | |
1084 | return; | |
1085 | ||
1086 | local_irq_save(flags); | |
1087 | ||
1088 | depth = this_cpu_ptr(&hcall_trace_depth); | |
1089 | ||
1090 | if (*depth) | |
1091 | goto out; | |
1092 | ||
1093 | (*depth)++; | |
1094 | preempt_disable(); | |
1095 | trace_hcall_entry(opcode, args); | |
1096 | (*depth)--; | |
1097 | ||
1098 | out: | |
1099 | local_irq_restore(flags); | |
1100 | } | |
1101 | ||
1102 | void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) | |
1103 | { | |
1104 | unsigned long flags; | |
1105 | unsigned int *depth; | |
1106 | ||
1107 | if (opcode == H_CEDE) | |
1108 | return; | |
1109 | ||
1110 | local_irq_save(flags); | |
1111 | ||
1112 | depth = this_cpu_ptr(&hcall_trace_depth); | |
1113 | ||
1114 | if (*depth) | |
1115 | goto out; | |
1116 | ||
1117 | (*depth)++; | |
1118 | trace_hcall_exit(opcode, retval, retbuf); | |
1119 | preempt_enable(); | |
1120 | (*depth)--; | |
1121 | ||
1122 | out: | |
1123 | local_irq_restore(flags); | |
1124 | } | |
1125 | #endif | |
1126 | ||
1127 | /** | |
1128 | * h_get_mpp | |
1129 | * H_GET_MPP hcall returns info in 7 parms | |
1130 | */ | |
1131 | int h_get_mpp(struct hvcall_mpp_data *mpp_data) | |
1132 | { | |
1133 | int rc; | |
1134 | unsigned long retbuf[PLPAR_HCALL9_BUFSIZE]; | |
1135 | ||
1136 | rc = plpar_hcall9(H_GET_MPP, retbuf); | |
1137 | ||
1138 | mpp_data->entitled_mem = retbuf[0]; | |
1139 | mpp_data->mapped_mem = retbuf[1]; | |
1140 | ||
1141 | mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff; | |
1142 | mpp_data->pool_num = retbuf[2] & 0xffff; | |
1143 | ||
1144 | mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff; | |
1145 | mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff; | |
1146 | mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL; | |
1147 | ||
1148 | mpp_data->pool_size = retbuf[4]; | |
1149 | mpp_data->loan_request = retbuf[5]; | |
1150 | mpp_data->backing_mem = retbuf[6]; | |
1151 | ||
1152 | return rc; | |
1153 | } | |
1154 | EXPORT_SYMBOL(h_get_mpp); | |
1155 | ||
1156 | int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data) | |
1157 | { | |
1158 | int rc; | |
1159 | unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 }; | |
1160 | ||
1161 | rc = plpar_hcall9(H_GET_MPP_X, retbuf); | |
1162 | ||
1163 | mpp_x_data->coalesced_bytes = retbuf[0]; | |
1164 | mpp_x_data->pool_coalesced_bytes = retbuf[1]; | |
1165 | mpp_x_data->pool_purr_cycles = retbuf[2]; | |
1166 | mpp_x_data->pool_spurr_cycles = retbuf[3]; | |
1167 | ||
1168 | return rc; | |
1169 | } | |
1170 | ||
1171 | static unsigned long vsid_unscramble(unsigned long vsid, int ssize) | |
1172 | { | |
1173 | unsigned long protovsid; | |
1174 | unsigned long va_bits = VA_BITS; | |
1175 | unsigned long modinv, vsid_modulus; | |
1176 | unsigned long max_mod_inv, tmp_modinv; | |
1177 | ||
1178 | if (!mmu_has_feature(MMU_FTR_68_BIT_VA)) | |
1179 | va_bits = 65; | |
1180 | ||
1181 | if (ssize == MMU_SEGSIZE_256M) { | |
1182 | modinv = VSID_MULINV_256M; | |
1183 | vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1); | |
1184 | } else { | |
1185 | modinv = VSID_MULINV_1T; | |
1186 | vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1); | |
1187 | } | |
1188 | ||
1189 | /* | |
1190 | * vsid outside our range. | |
1191 | */ | |
1192 | if (vsid >= vsid_modulus) | |
1193 | return 0; | |
1194 | ||
1195 | /* | |
1196 | * If modinv is the modular multiplicate inverse of (x % vsid_modulus) | |
1197 | * and vsid = (protovsid * x) % vsid_modulus, then we say: | |
1198 | * protovsid = (vsid * modinv) % vsid_modulus | |
1199 | */ | |
1200 | ||
1201 | /* Check if (vsid * modinv) overflow (63 bits) */ | |
1202 | max_mod_inv = 0x7fffffffffffffffull / vsid; | |
1203 | if (modinv < max_mod_inv) | |
1204 | return (vsid * modinv) % vsid_modulus; | |
1205 | ||
1206 | tmp_modinv = modinv/max_mod_inv; | |
1207 | modinv %= max_mod_inv; | |
1208 | ||
1209 | protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus; | |
1210 | protovsid = (protovsid + vsid * modinv) % vsid_modulus; | |
1211 | ||
1212 | return protovsid; | |
1213 | } | |
1214 | ||
1215 | static int __init reserve_vrma_context_id(void) | |
1216 | { | |
1217 | unsigned long protovsid; | |
1218 | ||
1219 | /* | |
1220 | * Reserve context ids which map to reserved virtual addresses. For now | |
1221 | * we only reserve the context id which maps to the VRMA VSID. We ignore | |
1222 | * the addresses in "ibm,adjunct-virtual-addresses" because we don't | |
1223 | * enable adjunct support via the "ibm,client-architecture-support" | |
1224 | * interface. | |
1225 | */ | |
1226 | protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T); | |
1227 | hash__reserve_context_id(protovsid >> ESID_BITS_1T); | |
1228 | return 0; | |
1229 | } | |
1230 | machine_device_initcall(pseries, reserve_vrma_context_id); | |
1231 | ||
1232 | #ifdef CONFIG_DEBUG_FS | |
1233 | /* debugfs file interface for vpa data */ | |
1234 | static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, | |
1235 | loff_t *pos) | |
1236 | { | |
1237 | int cpu = (long)filp->private_data; | |
1238 | struct lppaca *lppaca = &lppaca_of(cpu); | |
1239 | ||
1240 | return simple_read_from_buffer(buf, len, pos, lppaca, | |
1241 | sizeof(struct lppaca)); | |
1242 | } | |
1243 | ||
1244 | static const struct file_operations vpa_fops = { | |
1245 | .open = simple_open, | |
1246 | .read = vpa_file_read, | |
1247 | .llseek = default_llseek, | |
1248 | }; | |
1249 | ||
1250 | static int __init vpa_debugfs_init(void) | |
1251 | { | |
1252 | char name[16]; | |
1253 | long i; | |
1254 | static struct dentry *vpa_dir; | |
1255 | ||
1256 | if (!firmware_has_feature(FW_FEATURE_SPLPAR)) | |
1257 | return 0; | |
1258 | ||
1259 | vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); | |
1260 | if (!vpa_dir) { | |
1261 | pr_warn("%s: can't create vpa root dir\n", __func__); | |
1262 | return -ENOMEM; | |
1263 | } | |
1264 | ||
1265 | /* set up the per-cpu vpa file*/ | |
1266 | for_each_possible_cpu(i) { | |
1267 | struct dentry *d; | |
1268 | ||
1269 | sprintf(name, "cpu-%ld", i); | |
1270 | ||
1271 | d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, | |
1272 | &vpa_fops); | |
1273 | if (!d) { | |
1274 | pr_warn("%s: can't create per-cpu vpa file\n", | |
1275 | __func__); | |
1276 | return -ENOMEM; | |
1277 | } | |
1278 | } | |
1279 | ||
1280 | return 0; | |
1281 | } | |
1282 | machine_arch_initcall(pseries, vpa_debugfs_init); | |
1283 | #endif /* CONFIG_DEBUG_FS */ |