]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - arch/powerpc/kernel/vector.S
Merge tag 'iwlwifi-next-for-kalle-2016-10-25-2' of git://git.kernel.org/pub/scm/linux...
[mirror_ubuntu-zesty-kernel.git] / arch / powerpc / kernel / vector.S
1 #include <asm/processor.h>
2 #include <asm/ppc_asm.h>
3 #include <asm/reg.h>
4 #include <asm/asm-offsets.h>
5 #include <asm/cputable.h>
6 #include <asm/thread_info.h>
7 #include <asm/page.h>
8 #include <asm/ptrace.h>
9
10 /*
11 * Load state from memory into VMX registers including VSCR.
12 * Assumes the caller has enabled VMX in the MSR.
13 */
14 _GLOBAL(load_vr_state)
15 li r4,VRSTATE_VSCR
16 lvx v0,r4,r3
17 mtvscr v0
18 REST_32VRS(0,r4,r3)
19 blr
20
21 /*
22 * Store VMX state into memory, including VSCR.
23 * Assumes the caller has enabled VMX in the MSR.
24 */
25 _GLOBAL(store_vr_state)
26 SAVE_32VRS(0, r4, r3)
27 mfvscr v0
28 li r4, VRSTATE_VSCR
29 stvx v0, r4, r3
30 blr
31
32 /*
33 * Disable VMX for the task which had it previously,
34 * and save its vector registers in its thread_struct.
35 * Enables the VMX for use in the kernel on return.
36 * On SMP we know the VMX is free, since we give it up every
37 * switch (ie, no lazy save of the vector registers).
38 *
39 * Note that on 32-bit this can only use registers that will be
40 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
41 */
42 _GLOBAL(load_up_altivec)
43 mfmsr r5 /* grab the current MSR */
44 oris r5,r5,MSR_VEC@h
45 MTMSRD(r5) /* enable use of AltiVec now */
46 isync
47
48 /*
49 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
50 * to optimise userspace context save/restore. Whenever we take an
51 * altivec unavailable exception we must set VRSAVE to something non
52 * zero. Set it to all 1s. See also the programming note in the ISA.
53 */
54 mfspr r4,SPRN_VRSAVE
55 cmpwi 0,r4,0
56 bne+ 1f
57 li r4,-1
58 mtspr SPRN_VRSAVE,r4
59 1:
60 /* enable use of VMX after return */
61 #ifdef CONFIG_PPC32
62 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
63 oris r9,r9,MSR_VEC@h
64 #else
65 ld r4,PACACURRENT(r13)
66 addi r5,r4,THREAD /* Get THREAD */
67 oris r12,r12,MSR_VEC@h
68 std r12,_MSR(r1)
69 #endif
70 /* Don't care if r4 overflows, this is desired behaviour */
71 lbz r4,THREAD_LOAD_VEC(r5)
72 addi r4,r4,1
73 stb r4,THREAD_LOAD_VEC(r5)
74 addi r6,r5,THREAD_VRSTATE
75 li r4,1
76 li r10,VRSTATE_VSCR
77 stw r4,THREAD_USED_VR(r5)
78 lvx v0,r10,r6
79 mtvscr v0
80 REST_32VRS(0,r4,r6)
81 /* restore registers and return */
82 blr
83
84 /*
85 * save_altivec(tsk)
86 * Save the vector registers to its thread_struct
87 */
88 _GLOBAL(save_altivec)
89 addi r3,r3,THREAD /* want THREAD of task */
90 PPC_LL r7,THREAD_VRSAVEAREA(r3)
91 PPC_LL r5,PT_REGS(r3)
92 PPC_LCMPI 0,r7,0
93 bne 2f
94 addi r7,r3,THREAD_VRSTATE
95 2: SAVE_32VRS(0,r4,r7)
96 mfvscr v0
97 li r4,VRSTATE_VSCR
98 stvx v0,r4,r7
99 blr
100
101 #ifdef CONFIG_VSX
102
103 #ifdef CONFIG_PPC32
104 #error This asm code isn't ready for 32-bit kernels
105 #endif
106
107 /*
108 * load_up_vsx(unused, unused, tsk)
109 * Disable VSX for the task which had it previously,
110 * and save its vector registers in its thread_struct.
111 * Reuse the fp and vsx saves, but first check to see if they have
112 * been saved already.
113 */
114 _GLOBAL(load_up_vsx)
115 /* Load FP and VSX registers if they haven't been done yet */
116 andi. r5,r12,MSR_FP
117 beql+ load_up_fpu /* skip if already loaded */
118 andis. r5,r12,MSR_VEC@h
119 beql+ load_up_altivec /* skip if already loaded */
120
121 ld r4,PACACURRENT(r13)
122 addi r4,r4,THREAD /* Get THREAD */
123 li r6,1
124 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
125 /* enable use of VSX after return */
126 oris r12,r12,MSR_VSX@h
127 std r12,_MSR(r1)
128 b fast_exception_return
129
130 #endif /* CONFIG_VSX */
131
132
133 /*
134 * The routines below are in assembler so we can closely control the
135 * usage of floating-point registers. These routines must be called
136 * with preempt disabled.
137 */
138 #ifdef CONFIG_PPC32
139 .data
140 fpzero:
141 .long 0
142 fpone:
143 .long 0x3f800000 /* 1.0 in single-precision FP */
144 fphalf:
145 .long 0x3f000000 /* 0.5 in single-precision FP */
146
147 #define LDCONST(fr, name) \
148 lis r11,name@ha; \
149 lfs fr,name@l(r11)
150 #else
151
152 .section ".toc","aw"
153 fpzero:
154 .tc FD_0_0[TC],0
155 fpone:
156 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
157 fphalf:
158 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
159
160 #define LDCONST(fr, name) \
161 lfd fr,name@toc(r2)
162 #endif
163
164 .text
165 /*
166 * Internal routine to enable floating point and set FPSCR to 0.
167 * Don't call it from C; it doesn't use the normal calling convention.
168 */
169 fpenable:
170 #ifdef CONFIG_PPC32
171 stwu r1,-64(r1)
172 #else
173 stdu r1,-64(r1)
174 #endif
175 mfmsr r10
176 ori r11,r10,MSR_FP
177 mtmsr r11
178 isync
179 stfd fr0,24(r1)
180 stfd fr1,16(r1)
181 stfd fr31,8(r1)
182 LDCONST(fr1, fpzero)
183 mffs fr31
184 MTFSF_L(fr1)
185 blr
186
187 fpdisable:
188 mtlr r12
189 MTFSF_L(fr31)
190 lfd fr31,8(r1)
191 lfd fr1,16(r1)
192 lfd fr0,24(r1)
193 mtmsr r10
194 isync
195 addi r1,r1,64
196 blr
197
198 /*
199 * Vector add, floating point.
200 */
201 _GLOBAL(vaddfp)
202 mflr r12
203 bl fpenable
204 li r0,4
205 mtctr r0
206 li r6,0
207 1: lfsx fr0,r4,r6
208 lfsx fr1,r5,r6
209 fadds fr0,fr0,fr1
210 stfsx fr0,r3,r6
211 addi r6,r6,4
212 bdnz 1b
213 b fpdisable
214
215 /*
216 * Vector subtract, floating point.
217 */
218 _GLOBAL(vsubfp)
219 mflr r12
220 bl fpenable
221 li r0,4
222 mtctr r0
223 li r6,0
224 1: lfsx fr0,r4,r6
225 lfsx fr1,r5,r6
226 fsubs fr0,fr0,fr1
227 stfsx fr0,r3,r6
228 addi r6,r6,4
229 bdnz 1b
230 b fpdisable
231
232 /*
233 * Vector multiply and add, floating point.
234 */
235 _GLOBAL(vmaddfp)
236 mflr r12
237 bl fpenable
238 stfd fr2,32(r1)
239 li r0,4
240 mtctr r0
241 li r7,0
242 1: lfsx fr0,r4,r7
243 lfsx fr1,r5,r7
244 lfsx fr2,r6,r7
245 fmadds fr0,fr0,fr2,fr1
246 stfsx fr0,r3,r7
247 addi r7,r7,4
248 bdnz 1b
249 lfd fr2,32(r1)
250 b fpdisable
251
252 /*
253 * Vector negative multiply and subtract, floating point.
254 */
255 _GLOBAL(vnmsubfp)
256 mflr r12
257 bl fpenable
258 stfd fr2,32(r1)
259 li r0,4
260 mtctr r0
261 li r7,0
262 1: lfsx fr0,r4,r7
263 lfsx fr1,r5,r7
264 lfsx fr2,r6,r7
265 fnmsubs fr0,fr0,fr2,fr1
266 stfsx fr0,r3,r7
267 addi r7,r7,4
268 bdnz 1b
269 lfd fr2,32(r1)
270 b fpdisable
271
272 /*
273 * Vector reciprocal estimate. We just compute 1.0/x.
274 * r3 -> destination, r4 -> source.
275 */
276 _GLOBAL(vrefp)
277 mflr r12
278 bl fpenable
279 li r0,4
280 LDCONST(fr1, fpone)
281 mtctr r0
282 li r6,0
283 1: lfsx fr0,r4,r6
284 fdivs fr0,fr1,fr0
285 stfsx fr0,r3,r6
286 addi r6,r6,4
287 bdnz 1b
288 b fpdisable
289
290 /*
291 * Vector reciprocal square-root estimate, floating point.
292 * We use the frsqrte instruction for the initial estimate followed
293 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
294 * r3 -> destination, r4 -> source.
295 */
296 _GLOBAL(vrsqrtefp)
297 mflr r12
298 bl fpenable
299 stfd fr2,32(r1)
300 stfd fr3,40(r1)
301 stfd fr4,48(r1)
302 stfd fr5,56(r1)
303 li r0,4
304 LDCONST(fr4, fpone)
305 LDCONST(fr5, fphalf)
306 mtctr r0
307 li r6,0
308 1: lfsx fr0,r4,r6
309 frsqrte fr1,fr0 /* r = frsqrte(s) */
310 fmuls fr3,fr1,fr0 /* r * s */
311 fmuls fr2,fr1,fr5 /* r * 0.5 */
312 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
313 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
314 fmuls fr3,fr1,fr0 /* r * s */
315 fmuls fr2,fr1,fr5 /* r * 0.5 */
316 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
317 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
318 stfsx fr1,r3,r6
319 addi r6,r6,4
320 bdnz 1b
321 lfd fr5,56(r1)
322 lfd fr4,48(r1)
323 lfd fr3,40(r1)
324 lfd fr2,32(r1)
325 b fpdisable