]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - arch/ppc/kernel/vector.S
Linux-2.6.12-rc2
[mirror_ubuntu-focal-kernel.git] / arch / ppc / kernel / vector.S
1 #include <asm/ppc_asm.h>
2 #include <asm/processor.h>
3
4 /*
5 * The routines below are in assembler so we can closely control the
6 * usage of floating-point registers. These routines must be called
7 * with preempt disabled.
8 */
9 .data
10 fpzero:
11 .long 0
12 fpone:
13 .long 0x3f800000 /* 1.0 in single-precision FP */
14 fphalf:
15 .long 0x3f000000 /* 0.5 in single-precision FP */
16
17 .text
18 /*
19 * Internal routine to enable floating point and set FPSCR to 0.
20 * Don't call it from C; it doesn't use the normal calling convention.
21 */
22 fpenable:
23 mfmsr r10
24 ori r11,r10,MSR_FP
25 mtmsr r11
26 isync
27 stfd fr0,24(r1)
28 stfd fr1,16(r1)
29 stfd fr31,8(r1)
30 lis r11,fpzero@ha
31 mffs fr31
32 lfs fr1,fpzero@l(r11)
33 mtfsf 0xff,fr1
34 blr
35
36 fpdisable:
37 mtfsf 0xff,fr31
38 lfd fr31,8(r1)
39 lfd fr1,16(r1)
40 lfd fr0,24(r1)
41 mtmsr r10
42 isync
43 blr
44
45 /*
46 * Vector add, floating point.
47 */
48 .globl vaddfp
49 vaddfp:
50 stwu r1,-32(r1)
51 mflr r0
52 stw r0,36(r1)
53 bl fpenable
54 li r0,4
55 mtctr r0
56 li r6,0
57 1: lfsx fr0,r4,r6
58 lfsx fr1,r5,r6
59 fadds fr0,fr0,fr1
60 stfsx fr0,r3,r6
61 addi r6,r6,4
62 bdnz 1b
63 bl fpdisable
64 lwz r0,36(r1)
65 mtlr r0
66 addi r1,r1,32
67 blr
68
69 /*
70 * Vector subtract, floating point.
71 */
72 .globl vsubfp
73 vsubfp:
74 stwu r1,-32(r1)
75 mflr r0
76 stw r0,36(r1)
77 bl fpenable
78 li r0,4
79 mtctr r0
80 li r6,0
81 1: lfsx fr0,r4,r6
82 lfsx fr1,r5,r6
83 fsubs fr0,fr0,fr1
84 stfsx fr0,r3,r6
85 addi r6,r6,4
86 bdnz 1b
87 bl fpdisable
88 lwz r0,36(r1)
89 mtlr r0
90 addi r1,r1,32
91 blr
92
93 /*
94 * Vector multiply and add, floating point.
95 */
96 .globl vmaddfp
97 vmaddfp:
98 stwu r1,-48(r1)
99 mflr r0
100 stw r0,52(r1)
101 bl fpenable
102 stfd fr2,32(r1)
103 li r0,4
104 mtctr r0
105 li r7,0
106 1: lfsx fr0,r4,r7
107 lfsx fr1,r5,r7
108 lfsx fr2,r6,r7
109 fmadds fr0,fr0,fr2,fr1
110 stfsx fr0,r3,r7
111 addi r7,r7,4
112 bdnz 1b
113 lfd fr2,32(r1)
114 bl fpdisable
115 lwz r0,52(r1)
116 mtlr r0
117 addi r1,r1,48
118 blr
119
120 /*
121 * Vector negative multiply and subtract, floating point.
122 */
123 .globl vnmsubfp
124 vnmsubfp:
125 stwu r1,-48(r1)
126 mflr r0
127 stw r0,52(r1)
128 bl fpenable
129 stfd fr2,32(r1)
130 li r0,4
131 mtctr r0
132 li r7,0
133 1: lfsx fr0,r4,r7
134 lfsx fr1,r5,r7
135 lfsx fr2,r6,r7
136 fnmsubs fr0,fr0,fr2,fr1
137 stfsx fr0,r3,r7
138 addi r7,r7,4
139 bdnz 1b
140 lfd fr2,32(r1)
141 bl fpdisable
142 lwz r0,52(r1)
143 mtlr r0
144 addi r1,r1,48
145 blr
146
147 /*
148 * Vector reciprocal estimate. We just compute 1.0/x.
149 * r3 -> destination, r4 -> source.
150 */
151 .globl vrefp
152 vrefp:
153 stwu r1,-32(r1)
154 mflr r0
155 stw r0,36(r1)
156 bl fpenable
157 lis r9,fpone@ha
158 li r0,4
159 lfs fr1,fpone@l(r9)
160 mtctr r0
161 li r6,0
162 1: lfsx fr0,r4,r6
163 fdivs fr0,fr1,fr0
164 stfsx fr0,r3,r6
165 addi r6,r6,4
166 bdnz 1b
167 bl fpdisable
168 lwz r0,36(r1)
169 mtlr r0
170 addi r1,r1,32
171 blr
172
173 /*
174 * Vector reciprocal square-root estimate, floating point.
175 * We use the frsqrte instruction for the initial estimate followed
176 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
177 * r3 -> destination, r4 -> source.
178 */
179 .globl vrsqrtefp
180 vrsqrtefp:
181 stwu r1,-48(r1)
182 mflr r0
183 stw r0,52(r1)
184 bl fpenable
185 stfd fr2,32(r1)
186 stfd fr3,40(r1)
187 stfd fr4,48(r1)
188 stfd fr5,56(r1)
189 lis r9,fpone@ha
190 lis r8,fphalf@ha
191 li r0,4
192 lfs fr4,fpone@l(r9)
193 lfs fr5,fphalf@l(r8)
194 mtctr r0
195 li r6,0
196 1: lfsx fr0,r4,r6
197 frsqrte fr1,fr0 /* r = frsqrte(s) */
198 fmuls fr3,fr1,fr0 /* r * s */
199 fmuls fr2,fr1,fr5 /* r * 0.5 */
200 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
201 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
202 fmuls fr3,fr1,fr0 /* r * s */
203 fmuls fr2,fr1,fr5 /* r * 0.5 */
204 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
205 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
206 stfsx fr1,r3,r6
207 addi r6,r6,4
208 bdnz 1b
209 lfd fr5,56(r1)
210 lfd fr4,48(r1)
211 lfd fr3,40(r1)
212 lfd fr2,32(r1)
213 bl fpdisable
214 lwz r0,36(r1)
215 mtlr r0
216 addi r1,r1,32
217 blr