]>
Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
e2186023 ME |
2 | /* |
3 | * Copyright 2013, Michael (Ellerman|Neuling), IBM Corporation. | |
e2186023 ME |
4 | */ |
5 | ||
6 | #define pr_fmt(fmt) "powernv: " fmt | |
7 | ||
8 | #include <linux/kernel.h> | |
9 | #include <linux/cpu.h> | |
10 | #include <linux/cpumask.h> | |
11 | #include <linux/device.h> | |
12 | #include <linux/gfp.h> | |
13 | #include <linux/smp.h> | |
14 | #include <linux/stop_machine.h> | |
15 | ||
16 | #include <asm/cputhreads.h> | |
2201f994 | 17 | #include <asm/cpuidle.h> |
e2186023 ME |
18 | #include <asm/kvm_ppc.h> |
19 | #include <asm/machdep.h> | |
20 | #include <asm/opal.h> | |
21 | #include <asm/smp.h> | |
22 | ||
23 | #include "subcore.h" | |
1217d34b | 24 | #include "powernv.h" |
e2186023 ME |
25 | |
26 | ||
27 | /* | |
28 | * Split/unsplit procedure: | |
29 | * | |
30 | * A core can be in one of three states, unsplit, 2-way split, and 4-way split. | |
31 | * | |
32 | * The mapping to subcores_per_core is simple: | |
33 | * | |
34 | * State | subcores_per_core | |
35 | * ------------|------------------ | |
36 | * Unsplit | 1 | |
37 | * 2-way split | 2 | |
38 | * 4-way split | 4 | |
39 | * | |
40 | * The core is split along thread boundaries, the mapping between subcores and | |
41 | * threads is as follows: | |
42 | * | |
43 | * Unsplit: | |
44 | * ---------------------------- | |
45 | * Subcore | 0 | | |
46 | * ---------------------------- | |
47 | * Thread | 0 1 2 3 4 5 6 7 | | |
48 | * ---------------------------- | |
49 | * | |
50 | * 2-way split: | |
51 | * ------------------------------------- | |
52 | * Subcore | 0 | 1 | | |
53 | * ------------------------------------- | |
54 | * Thread | 0 1 2 3 | 4 5 6 7 | | |
55 | * ------------------------------------- | |
56 | * | |
57 | * 4-way split: | |
58 | * ----------------------------------------- | |
59 | * Subcore | 0 | 1 | 2 | 3 | | |
60 | * ----------------------------------------- | |
61 | * Thread | 0 1 | 2 3 | 4 5 | 6 7 | | |
62 | * ----------------------------------------- | |
63 | * | |
64 | * | |
65 | * Transitions | |
66 | * ----------- | |
67 | * | |
68 | * It is not possible to transition between either of the split states, the | |
69 | * core must first be unsplit. The legal transitions are: | |
70 | * | |
71 | * ----------- --------------- | |
72 | * | | <----> | 2-way split | | |
73 | * | | --------------- | |
74 | * | Unsplit | | |
75 | * | | --------------- | |
76 | * | | <----> | 4-way split | | |
77 | * ----------- --------------- | |
78 | * | |
79 | * Unsplitting | |
80 | * ----------- | |
81 | * | |
82 | * Unsplitting is the simpler procedure. It requires thread 0 to request the | |
83 | * unsplit while all other threads NAP. | |
84 | * | |
85 | * Thread 0 clears HID0_POWER8_DYNLPARDIS (Dynamic LPAR Disable). This tells | |
86 | * the hardware that if all threads except 0 are napping, the hardware should | |
87 | * unsplit the core. | |
88 | * | |
89 | * Non-zero threads are sent to a NAP loop, they don't exit the loop until they | |
90 | * see the core unsplit. | |
91 | * | |
92 | * Core 0 spins waiting for the hardware to see all the other threads napping | |
93 | * and perform the unsplit. | |
94 | * | |
95 | * Once thread 0 sees the unsplit, it IPIs the secondary threads to wake them | |
96 | * out of NAP. They will then see the core unsplit and exit the NAP loop. | |
97 | * | |
98 | * Splitting | |
99 | * --------- | |
100 | * | |
101 | * The basic splitting procedure is fairly straight forward. However it is | |
102 | * complicated by the fact that after the split occurs, the newly created | |
103 | * subcores are not in a fully initialised state. | |
104 | * | |
105 | * Most notably the subcores do not have the correct value for SDR1, which | |
106 | * means they must not be running in virtual mode when the split occurs. The | |
107 | * subcores have separate timebases SPRs but these are pre-synchronised by | |
108 | * opal. | |
109 | * | |
110 | * To begin with secondary threads are sent to an assembly routine. There they | |
111 | * switch to real mode, so they are immune to the uninitialised SDR1 value. | |
112 | * Once in real mode they indicate that they are in real mode, and spin waiting | |
113 | * to see the core split. | |
114 | * | |
115 | * Thread 0 waits to see that all secondaries are in real mode, and then begins | |
116 | * the splitting procedure. It firstly sets HID0_POWER8_DYNLPARDIS, which | |
117 | * prevents the hardware from unsplitting. Then it sets the appropriate HID bit | |
118 | * to request the split, and spins waiting to see that the split has happened. | |
119 | * | |
120 | * Concurrently the secondaries will notice the split. When they do they set up | |
121 | * their SPRs, notably SDR1, and then they can return to virtual mode and exit | |
122 | * the procedure. | |
123 | */ | |
124 | ||
125 | /* Initialised at boot by subcore_init() */ | |
126 | static int subcores_per_core; | |
127 | ||
128 | /* | |
129 | * Used to communicate to offline cpus that we want them to pop out of the | |
130 | * offline loop and do a split or unsplit. | |
131 | * | |
132 | * 0 - no split happening | |
133 | * 1 - unsplit in progress | |
134 | * 2 - split to 2 in progress | |
135 | * 4 - split to 4 in progress | |
136 | */ | |
137 | static int new_split_mode; | |
138 | ||
139 | static cpumask_var_t cpu_offline_mask; | |
140 | ||
141 | struct split_state { | |
142 | u8 step; | |
143 | u8 master; | |
144 | }; | |
145 | ||
146 | static DEFINE_PER_CPU(struct split_state, split_state); | |
147 | ||
148 | static void wait_for_sync_step(int step) | |
149 | { | |
150 | int i, cpu = smp_processor_id(); | |
151 | ||
152 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
153 | while(per_cpu(split_state, i).step < step) | |
154 | barrier(); | |
155 | ||
156 | /* Order the wait loop vs any subsequent loads/stores. */ | |
157 | mb(); | |
158 | } | |
159 | ||
77b54e9f SP |
160 | static void update_hid_in_slw(u64 hid0) |
161 | { | |
162 | u64 idle_states = pnv_get_supported_cpuidle_states(); | |
163 | ||
164 | if (idle_states & OPAL_PM_WINKLE_ENABLED) { | |
165 | /* OPAL call to patch slw with the new HID0 value */ | |
166 | u64 cpu_pir = hard_smp_processor_id(); | |
167 | ||
168 | opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0); | |
169 | } | |
170 | } | |
171 | ||
e2186023 ME |
172 | static void unsplit_core(void) |
173 | { | |
174 | u64 hid0, mask; | |
175 | int i, cpu; | |
176 | ||
177 | mask = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE; | |
178 | ||
179 | cpu = smp_processor_id(); | |
180 | if (cpu_thread_in_core(cpu) != 0) { | |
181 | while (mfspr(SPRN_HID0) & mask) | |
10d91611 | 182 | power7_idle_type(PNV_THREAD_NAP); |
e2186023 ME |
183 | |
184 | per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT; | |
185 | return; | |
186 | } | |
187 | ||
188 | hid0 = mfspr(SPRN_HID0); | |
189 | hid0 &= ~HID0_POWER8_DYNLPARDIS; | |
e63dbd16 | 190 | update_power8_hid0(hid0); |
77b54e9f | 191 | update_hid_in_slw(hid0); |
e2186023 ME |
192 | |
193 | while (mfspr(SPRN_HID0) & mask) | |
194 | cpu_relax(); | |
195 | ||
196 | /* Wake secondaries out of NAP */ | |
197 | for (i = cpu + 1; i < cpu + threads_per_core; i++) | |
198 | smp_send_reschedule(i); | |
199 | ||
200 | wait_for_sync_step(SYNC_STEP_UNSPLIT); | |
201 | } | |
202 | ||
203 | static void split_core(int new_mode) | |
204 | { | |
205 | struct { u64 value; u64 mask; } split_parms[2] = { | |
206 | { HID0_POWER8_1TO2LPAR, HID0_POWER8_2LPARMODE }, | |
207 | { HID0_POWER8_1TO4LPAR, HID0_POWER8_4LPARMODE } | |
208 | }; | |
209 | int i, cpu; | |
210 | u64 hid0; | |
211 | ||
212 | /* Convert new_mode (2 or 4) into an index into our parms array */ | |
213 | i = (new_mode >> 1) - 1; | |
214 | BUG_ON(i < 0 || i > 1); | |
215 | ||
216 | cpu = smp_processor_id(); | |
217 | if (cpu_thread_in_core(cpu) != 0) { | |
218 | split_core_secondary_loop(&per_cpu(split_state, cpu).step); | |
219 | return; | |
220 | } | |
221 | ||
222 | wait_for_sync_step(SYNC_STEP_REAL_MODE); | |
223 | ||
224 | /* Write new mode */ | |
225 | hid0 = mfspr(SPRN_HID0); | |
226 | hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value; | |
e63dbd16 | 227 | update_power8_hid0(hid0); |
77b54e9f | 228 | update_hid_in_slw(hid0); |
e2186023 ME |
229 | |
230 | /* Wait for it to happen */ | |
231 | while (!(mfspr(SPRN_HID0) & split_parms[i].mask)) | |
232 | cpu_relax(); | |
233 | } | |
234 | ||
235 | static void cpu_do_split(int new_mode) | |
236 | { | |
237 | /* | |
238 | * At boot subcores_per_core will be 0, so we will always unsplit at | |
239 | * boot. In the usual case where the core is already unsplit it's a | |
240 | * nop, and this just ensures the kernel's notion of the mode is | |
241 | * consistent with the hardware. | |
242 | */ | |
243 | if (subcores_per_core != 1) | |
244 | unsplit_core(); | |
245 | ||
246 | if (new_mode != 1) | |
247 | split_core(new_mode); | |
248 | ||
249 | mb(); | |
250 | per_cpu(split_state, smp_processor_id()).step = SYNC_STEP_FINISHED; | |
251 | } | |
252 | ||
253 | bool cpu_core_split_required(void) | |
254 | { | |
255 | smp_rmb(); | |
256 | ||
257 | if (!new_split_mode) | |
258 | return false; | |
259 | ||
260 | cpu_do_split(new_split_mode); | |
261 | ||
262 | return true; | |
263 | } | |
264 | ||
77b54e9f SP |
265 | void update_subcore_sibling_mask(void) |
266 | { | |
267 | int cpu; | |
268 | /* | |
269 | * sibling mask for the first cpu. Left shift this by required bits | |
270 | * to get sibling mask for the rest of the cpus. | |
271 | */ | |
272 | int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1; | |
273 | ||
274 | for_each_possible_cpu(cpu) { | |
275 | int tid = cpu_thread_in_core(cpu); | |
276 | int offset = (tid / threads_per_subcore) * threads_per_subcore; | |
277 | int mask = sibling_mask_first_cpu << offset; | |
278 | ||
d2e60075 | 279 | paca_ptrs[cpu]->subcore_sibling_mask = mask; |
77b54e9f SP |
280 | |
281 | } | |
282 | } | |
283 | ||
e2186023 ME |
284 | static int cpu_update_split_mode(void *data) |
285 | { | |
286 | int cpu, new_mode = *(int *)data; | |
287 | ||
288 | if (this_cpu_ptr(&split_state)->master) { | |
289 | new_split_mode = new_mode; | |
290 | smp_wmb(); | |
291 | ||
292 | cpumask_andnot(cpu_offline_mask, cpu_present_mask, | |
293 | cpu_online_mask); | |
294 | ||
295 | /* This should work even though the cpu is offline */ | |
296 | for_each_cpu(cpu, cpu_offline_mask) | |
297 | smp_send_reschedule(cpu); | |
298 | } | |
299 | ||
300 | cpu_do_split(new_mode); | |
301 | ||
302 | if (this_cpu_ptr(&split_state)->master) { | |
303 | /* Wait for all cpus to finish before we touch subcores_per_core */ | |
304 | for_each_present_cpu(cpu) { | |
305 | if (cpu >= setup_max_cpus) | |
306 | break; | |
307 | ||
308 | while(per_cpu(split_state, cpu).step < SYNC_STEP_FINISHED) | |
309 | barrier(); | |
310 | } | |
311 | ||
312 | new_split_mode = 0; | |
313 | ||
314 | /* Make the new mode public */ | |
315 | subcores_per_core = new_mode; | |
316 | threads_per_subcore = threads_per_core / subcores_per_core; | |
77b54e9f | 317 | update_subcore_sibling_mask(); |
e2186023 ME |
318 | |
319 | /* Make sure the new mode is written before we exit */ | |
320 | mb(); | |
321 | } | |
322 | ||
323 | return 0; | |
324 | } | |
325 | ||
326 | static int set_subcores_per_core(int new_mode) | |
327 | { | |
328 | struct split_state *state; | |
329 | int cpu; | |
330 | ||
331 | if (kvm_hv_mode_active()) { | |
332 | pr_err("Unable to change split core mode while KVM active.\n"); | |
333 | return -EBUSY; | |
334 | } | |
335 | ||
336 | /* | |
337 | * We are only called at boot, or from the sysfs write. If that ever | |
338 | * changes we'll need a lock here. | |
339 | */ | |
340 | BUG_ON(new_mode < 1 || new_mode > 4 || new_mode == 3); | |
341 | ||
342 | for_each_present_cpu(cpu) { | |
343 | state = &per_cpu(split_state, cpu); | |
344 | state->step = SYNC_STEP_INITIAL; | |
345 | state->master = 0; | |
346 | } | |
347 | ||
f9a69931 | 348 | cpus_read_lock(); |
e2186023 ME |
349 | |
350 | /* This cpu will update the globals before exiting stop machine */ | |
351 | this_cpu_ptr(&split_state)->master = 1; | |
352 | ||
353 | /* Ensure state is consistent before we call the other cpus */ | |
354 | mb(); | |
355 | ||
f9a69931 SAS |
356 | stop_machine_cpuslocked(cpu_update_split_mode, &new_mode, |
357 | cpu_online_mask); | |
e2186023 | 358 | |
f9a69931 | 359 | cpus_read_unlock(); |
e2186023 ME |
360 | |
361 | return 0; | |
362 | } | |
363 | ||
364 | static ssize_t __used store_subcores_per_core(struct device *dev, | |
365 | struct device_attribute *attr, const char *buf, | |
366 | size_t count) | |
367 | { | |
368 | unsigned long val; | |
369 | int rc; | |
370 | ||
371 | /* We are serialised by the attribute lock */ | |
372 | ||
373 | rc = sscanf(buf, "%lx", &val); | |
374 | if (rc != 1) | |
375 | return -EINVAL; | |
376 | ||
377 | switch (val) { | |
378 | case 1: | |
379 | case 2: | |
380 | case 4: | |
381 | if (subcores_per_core == val) | |
382 | /* Nothing to do */ | |
383 | goto out; | |
384 | break; | |
385 | default: | |
386 | return -EINVAL; | |
387 | } | |
388 | ||
389 | rc = set_subcores_per_core(val); | |
390 | if (rc) | |
391 | return rc; | |
392 | ||
393 | out: | |
394 | return count; | |
395 | } | |
396 | ||
397 | static ssize_t show_subcores_per_core(struct device *dev, | |
398 | struct device_attribute *attr, char *buf) | |
399 | { | |
400 | return sprintf(buf, "%x\n", subcores_per_core); | |
401 | } | |
402 | ||
403 | static DEVICE_ATTR(subcores_per_core, 0644, | |
404 | show_subcores_per_core, store_subcores_per_core); | |
405 | ||
406 | static int subcore_init(void) | |
407 | { | |
0e5e7f5e ME |
408 | unsigned pvr_ver; |
409 | ||
410 | pvr_ver = PVR_VER(mfspr(SPRN_PVR)); | |
411 | ||
412 | if (pvr_ver != PVR_POWER8 && | |
413 | pvr_ver != PVR_POWER8E && | |
414 | pvr_ver != PVR_POWER8NVL) | |
e2186023 ME |
415 | return 0; |
416 | ||
417 | /* | |
418 | * We need all threads in a core to be present to split/unsplit so | |
419 | * continue only if max_cpus are aligned to threads_per_core. | |
420 | */ | |
421 | if (setup_max_cpus % threads_per_core) | |
422 | return 0; | |
423 | ||
424 | BUG_ON(!alloc_cpumask_var(&cpu_offline_mask, GFP_KERNEL)); | |
425 | ||
426 | set_subcores_per_core(1); | |
427 | ||
428 | return device_create_file(cpu_subsys.dev_root, | |
429 | &dev_attr_subcores_per_core); | |
430 | } | |
431 | machine_device_initcall(powernv, subcore_init); |