]>
Commit | Line | Data |
---|---|---|
04864808 HB |
1 | /* |
2 | * In-kernel vector facility support functions | |
3 | * | |
4 | * Copyright IBM Corp. 2015 | |
5 | * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> | |
6 | */ | |
7 | #include <linux/kernel.h> | |
8 | #include <linux/cpu.h> | |
9 | #include <linux/sched.h> | |
10 | #include <asm/fpu/types.h> | |
11 | #include <asm/fpu/api.h> | |
12 | ||
13 | /* | |
14 | * Per-CPU variable to maintain FPU register ranges that are in use | |
15 | * by the kernel. | |
16 | */ | |
17 | static DEFINE_PER_CPU(u32, kernel_fpu_state); | |
18 | ||
19 | #define KERNEL_FPU_STATE_MASK (KERNEL_FPU_MASK|KERNEL_FPC) | |
20 | ||
21 | ||
22 | void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags) | |
23 | { | |
24 | if (!__this_cpu_read(kernel_fpu_state)) { | |
25 | /* | |
26 | * Save user space FPU state and register contents. Multiple | |
27 | * calls because of interruptions do not matter and return | |
28 | * immediately. This also sets CIF_FPU to lazy restore FP/VX | |
29 | * register contents when returning to user space. | |
30 | */ | |
31 | save_fpu_regs(); | |
32 | } | |
33 | ||
34 | /* Update flags to use the vector facility for KERNEL_FPR */ | |
35 | if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) { | |
36 | flags |= KERNEL_VXR_LOW | KERNEL_FPC; | |
37 | flags &= ~KERNEL_FPR; | |
38 | } | |
39 | ||
40 | /* Save and update current kernel VX state */ | |
41 | state->mask = __this_cpu_read(kernel_fpu_state); | |
42 | __this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK); | |
43 | ||
44 | /* | |
45 | * If this is the first call to __kernel_fpu_begin(), no additional | |
46 | * work is required. | |
47 | */ | |
48 | if (!(state->mask & KERNEL_FPU_STATE_MASK)) | |
49 | return; | |
50 | ||
51 | /* | |
52 | * If KERNEL_FPR is still set, the vector facility is not available | |
53 | * and, thus, save floating-point control and registers only. | |
54 | */ | |
55 | if (state->mask & KERNEL_FPR) { | |
56 | asm volatile("stfpc %0" : "=Q" (state->fpc)); | |
57 | asm volatile("std 0,%0" : "=Q" (state->fprs[0])); | |
58 | asm volatile("std 1,%0" : "=Q" (state->fprs[1])); | |
59 | asm volatile("std 2,%0" : "=Q" (state->fprs[2])); | |
60 | asm volatile("std 3,%0" : "=Q" (state->fprs[3])); | |
61 | asm volatile("std 4,%0" : "=Q" (state->fprs[4])); | |
62 | asm volatile("std 5,%0" : "=Q" (state->fprs[5])); | |
63 | asm volatile("std 6,%0" : "=Q" (state->fprs[6])); | |
64 | asm volatile("std 7,%0" : "=Q" (state->fprs[7])); | |
65 | asm volatile("std 8,%0" : "=Q" (state->fprs[8])); | |
66 | asm volatile("std 9,%0" : "=Q" (state->fprs[9])); | |
67 | asm volatile("std 10,%0" : "=Q" (state->fprs[10])); | |
68 | asm volatile("std 11,%0" : "=Q" (state->fprs[11])); | |
69 | asm volatile("std 12,%0" : "=Q" (state->fprs[12])); | |
70 | asm volatile("std 13,%0" : "=Q" (state->fprs[13])); | |
71 | asm volatile("std 14,%0" : "=Q" (state->fprs[14])); | |
72 | asm volatile("std 15,%0" : "=Q" (state->fprs[15])); | |
73 | return; | |
74 | } | |
75 | ||
76 | /* | |
77 | * If this is a nested call to __kernel_fpu_begin(), check the saved | |
78 | * state mask to save and later restore the vector registers that | |
79 | * are already in use. Let's start with checking floating-point | |
80 | * controls. | |
81 | */ | |
82 | if (state->mask & KERNEL_FPC) | |
83 | asm volatile("stfpc %0" : "=m" (state->fpc)); | |
84 | ||
85 | /* Test and save vector registers */ | |
86 | asm volatile ( | |
87 | /* | |
88 | * Test if any vector register must be saved and, if so, | |
89 | * test if all register can be saved. | |
90 | */ | |
91 | " tmll %[m],15\n" /* KERNEL_VXR_MASK */ | |
92 | " jz 20f\n" /* no work -> done */ | |
93 | " la 1,%[vxrs]\n" /* load save area */ | |
94 | " jo 18f\n" /* -> save V0..V31 */ | |
95 | ||
96 | /* | |
97 | * Test if V8..V23 can be saved at once... this speeds up | |
98 | * for KERNEL_fpu_MID only. Otherwise continue to split the | |
99 | * range of vector registers into two halves and test them | |
100 | * separately. | |
101 | */ | |
102 | " tmll %[m],6\n" /* KERNEL_VXR_MID */ | |
103 | " jo 17f\n" /* -> save V8..V23 */ | |
104 | ||
105 | /* Test and save the first half of 16 vector registers */ | |
106 | "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ | |
107 | " jz 10f\n" /* -> KERNEL_VXR_HIGH */ | |
108 | " jo 2f\n" /* 11 -> save V0..V15 */ | |
109 | " brc 4,3f\n" /* 01 -> save V0..V7 */ | |
110 | " brc 2,4f\n" /* 10 -> save V8..V15 */ | |
111 | ||
112 | /* Test and save the second half of 16 vector registers */ | |
113 | "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ | |
114 | " jo 19f\n" /* 11 -> save V16..V31 */ | |
115 | " brc 4,11f\n" /* 01 -> save V16..V23 */ | |
116 | " brc 2,12f\n" /* 10 -> save V24..V31 */ | |
117 | " j 20f\n" /* 00 -> done */ | |
118 | ||
119 | /* | |
120 | * Below are the vstm combinations to save multiple vector | |
121 | * registers at once. | |
122 | */ | |
123 | "2: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ | |
124 | " j 10b\n" /* -> VXR_HIGH */ | |
125 | "3: .word 0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */ | |
126 | " j 10b\n" /* -> VXR_HIGH */ | |
127 | "4: .word 0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */ | |
128 | " j 10b\n" /* -> VXR_HIGH */ | |
129 | "\n" | |
130 | "11: .word 0xe707,0x1100,0x0c3e\n" /* vstm 16,23,256(1) */ | |
131 | " j 20f\n" /* -> done */ | |
132 | "12: .word 0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */ | |
133 | " j 20f\n" /* -> done */ | |
134 | "\n" | |
135 | "17: .word 0xe787,0x1080,0x043e\n" /* vstm 8,23,128(1) */ | |
136 | " nill %[m],249\n" /* m &= ~VXR_MID */ | |
137 | " j 1b\n" /* -> VXR_LOW */ | |
138 | "\n" | |
139 | "18: .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */ | |
140 | "19: .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */ | |
141 | "20:" | |
142 | : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs) | |
143 | : [m] "d" (state->mask) | |
144 | : "1", "cc"); | |
145 | } | |
146 | EXPORT_SYMBOL(__kernel_fpu_begin); | |
147 | ||
148 | void __kernel_fpu_end(struct kernel_fpu *state) | |
149 | { | |
150 | /* Just update the per-CPU state if there is nothing to restore */ | |
151 | if (!(state->mask & KERNEL_FPU_STATE_MASK)) | |
152 | goto update_fpu_state; | |
153 | ||
154 | /* | |
155 | * If KERNEL_FPR is specified, the vector facility is not available | |
156 | * and, thus, restore floating-point control and registers only. | |
157 | */ | |
158 | if (state->mask & KERNEL_FPR) { | |
159 | asm volatile("lfpc %0" : : "Q" (state->fpc)); | |
160 | asm volatile("ld 0,%0" : : "Q" (state->fprs[0])); | |
161 | asm volatile("ld 1,%0" : : "Q" (state->fprs[1])); | |
162 | asm volatile("ld 2,%0" : : "Q" (state->fprs[2])); | |
163 | asm volatile("ld 3,%0" : : "Q" (state->fprs[3])); | |
164 | asm volatile("ld 4,%0" : : "Q" (state->fprs[4])); | |
165 | asm volatile("ld 5,%0" : : "Q" (state->fprs[5])); | |
166 | asm volatile("ld 6,%0" : : "Q" (state->fprs[6])); | |
167 | asm volatile("ld 7,%0" : : "Q" (state->fprs[7])); | |
168 | asm volatile("ld 8,%0" : : "Q" (state->fprs[8])); | |
169 | asm volatile("ld 9,%0" : : "Q" (state->fprs[9])); | |
170 | asm volatile("ld 10,%0" : : "Q" (state->fprs[10])); | |
171 | asm volatile("ld 11,%0" : : "Q" (state->fprs[11])); | |
172 | asm volatile("ld 12,%0" : : "Q" (state->fprs[12])); | |
173 | asm volatile("ld 13,%0" : : "Q" (state->fprs[13])); | |
174 | asm volatile("ld 14,%0" : : "Q" (state->fprs[14])); | |
175 | asm volatile("ld 15,%0" : : "Q" (state->fprs[15])); | |
176 | goto update_fpu_state; | |
177 | } | |
178 | ||
179 | /* Test and restore floating-point controls */ | |
180 | if (state->mask & KERNEL_FPC) | |
181 | asm volatile("lfpc %0" : : "Q" (state->fpc)); | |
182 | ||
183 | /* Test and restore (load) vector registers */ | |
184 | asm volatile ( | |
185 | /* | |
186 | * Test if any vector registers must be loaded and, if so, | |
187 | * test if all registers can be loaded at once. | |
188 | */ | |
189 | " tmll %[m],15\n" /* KERNEL_VXR_MASK */ | |
190 | " jz 20f\n" /* no work -> done */ | |
191 | " la 1,%[vxrs]\n" /* load load area */ | |
192 | " jo 18f\n" /* -> load V0..V31 */ | |
193 | ||
194 | /* | |
195 | * Test if V8..V23 can be restored at once... this speeds up | |
196 | * for KERNEL_VXR_MID only. Otherwise continue to split the | |
197 | * range of vector registers into two halves and test them | |
198 | * separately. | |
199 | */ | |
200 | " tmll %[m],6\n" /* KERNEL_VXR_MID */ | |
201 | " jo 17f\n" /* -> load V8..V23 */ | |
202 | ||
203 | /* Test and load the first half of 16 vector registers */ | |
204 | "1: tmll %[m],3\n" /* KERNEL_VXR_LOW */ | |
205 | " jz 10f\n" /* -> KERNEL_VXR_HIGH */ | |
206 | " jo 2f\n" /* 11 -> load V0..V15 */ | |
207 | " brc 4,3f\n" /* 01 -> load V0..V7 */ | |
208 | " brc 2,4f\n" /* 10 -> load V8..V15 */ | |
209 | ||
210 | /* Test and load the second half of 16 vector registers */ | |
211 | "10: tmll %[m],12\n" /* KERNEL_VXR_HIGH */ | |
212 | " jo 19f\n" /* 11 -> load V16..V31 */ | |
213 | " brc 4,11f\n" /* 01 -> load V16..V23 */ | |
214 | " brc 2,12f\n" /* 10 -> load V24..V31 */ | |
215 | " j 20f\n" /* 00 -> done */ | |
216 | ||
217 | /* | |
218 | * Below are the vstm combinations to load multiple vector | |
219 | * registers at once. | |
220 | */ | |
221 | "2: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ | |
222 | " j 10b\n" /* -> VXR_HIGH */ | |
223 | "3: .word 0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */ | |
224 | " j 10b\n" /* -> VXR_HIGH */ | |
225 | "4: .word 0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */ | |
226 | " j 10b\n" /* -> VXR_HIGH */ | |
227 | "\n" | |
228 | "11: .word 0xe707,0x1100,0x0c36\n" /* vlm 16,23,256(1) */ | |
229 | " j 20f\n" /* -> done */ | |
230 | "12: .word 0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */ | |
231 | " j 20f\n" /* -> done */ | |
232 | "\n" | |
233 | "17: .word 0xe787,0x1080,0x0436\n" /* vlm 8,23,128(1) */ | |
234 | " nill %[m],249\n" /* m &= ~VXR_MID */ | |
235 | " j 1b\n" /* -> VXR_LOW */ | |
236 | "\n" | |
237 | "18: .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */ | |
238 | "19: .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */ | |
239 | "20:" | |
240 | : | |
241 | : [vxrs] "Q" (*(struct vx_array *) &state->vxrs), | |
242 | [m] "d" (state->mask) | |
243 | : "1", "cc"); | |
244 | ||
245 | update_fpu_state: | |
246 | /* Update current kernel VX state */ | |
247 | __this_cpu_write(kernel_fpu_state, state->mask); | |
248 | } | |
249 | EXPORT_SYMBOL(__kernel_fpu_end); |