]> git.proxmox.com Git - mirror_qemu.git/blob - target-arm/helper-a64.c
target-arm: A64: Implement PMULL instruction
[mirror_qemu.git] / target-arm / helper-a64.c
1 /*
2 * AArch64 specific helpers
3 *
4 * Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include "cpu.h"
21 #include "exec/gdbstub.h"
22 #include "helper.h"
23 #include "qemu/host-utils.h"
24 #include "sysemu/sysemu.h"
25 #include "qemu/bitops.h"
26
27 /* C2.4.7 Multiply and divide */
28 /* special cases for 0 and LLONG_MIN are mandated by the standard */
29 uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
30 {
31 if (den == 0) {
32 return 0;
33 }
34 return num / den;
35 }
36
37 int64_t HELPER(sdiv64)(int64_t num, int64_t den)
38 {
39 if (den == 0) {
40 return 0;
41 }
42 if (num == LLONG_MIN && den == -1) {
43 return LLONG_MIN;
44 }
45 return num / den;
46 }
47
48 uint64_t HELPER(clz64)(uint64_t x)
49 {
50 return clz64(x);
51 }
52
53 uint64_t HELPER(cls64)(uint64_t x)
54 {
55 return clrsb64(x);
56 }
57
58 uint32_t HELPER(cls32)(uint32_t x)
59 {
60 return clrsb32(x);
61 }
62
63 uint64_t HELPER(rbit64)(uint64_t x)
64 {
65 /* assign the correct byte position */
66 x = bswap64(x);
67
68 /* assign the correct nibble position */
69 x = ((x & 0xf0f0f0f0f0f0f0f0ULL) >> 4)
70 | ((x & 0x0f0f0f0f0f0f0f0fULL) << 4);
71
72 /* assign the correct bit position */
73 x = ((x & 0x8888888888888888ULL) >> 3)
74 | ((x & 0x4444444444444444ULL) >> 1)
75 | ((x & 0x2222222222222222ULL) << 1)
76 | ((x & 0x1111111111111111ULL) << 3);
77
78 return x;
79 }
80
81 /* Convert a softfloat float_relation_ (as returned by
82 * the float*_compare functions) to the correct ARM
83 * NZCV flag state.
84 */
85 static inline uint32_t float_rel_to_flags(int res)
86 {
87 uint64_t flags;
88 switch (res) {
89 case float_relation_equal:
90 flags = PSTATE_Z | PSTATE_C;
91 break;
92 case float_relation_less:
93 flags = PSTATE_N;
94 break;
95 case float_relation_greater:
96 flags = PSTATE_C;
97 break;
98 case float_relation_unordered:
99 default:
100 flags = PSTATE_C | PSTATE_V;
101 break;
102 }
103 return flags;
104 }
105
106 uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
107 {
108 return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
109 }
110
111 uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
112 {
113 return float_rel_to_flags(float32_compare(x, y, fp_status));
114 }
115
116 uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
117 {
118 return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
119 }
120
121 uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
122 {
123 return float_rel_to_flags(float64_compare(x, y, fp_status));
124 }
125
126 float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
127 {
128 float_status *fpst = fpstp;
129
130 if ((float32_is_zero(a) && float32_is_infinity(b)) ||
131 (float32_is_infinity(a) && float32_is_zero(b))) {
132 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
133 return make_float32((1U << 30) |
134 ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
135 }
136 return float32_mul(a, b, fpst);
137 }
138
139 float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
140 {
141 float_status *fpst = fpstp;
142
143 if ((float64_is_zero(a) && float64_is_infinity(b)) ||
144 (float64_is_infinity(a) && float64_is_zero(b))) {
145 /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
146 return make_float64((1ULL << 62) |
147 ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
148 }
149 return float64_mul(a, b, fpst);
150 }
151
152 uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
153 uint32_t rn, uint32_t numregs)
154 {
155 /* Helper function for SIMD TBL and TBX. We have to do the table
156 * lookup part for the 64 bits worth of indices we're passed in.
157 * result is the initial results vector (either zeroes for TBL
158 * or some guest values for TBX), rn the register number where
159 * the table starts, and numregs the number of registers in the table.
160 * We return the results of the lookups.
161 */
162 int shift;
163
164 for (shift = 0; shift < 64; shift += 8) {
165 int index = extract64(indices, shift, 8);
166 if (index < 16 * numregs) {
167 /* Convert index (a byte offset into the virtual table
168 * which is a series of 128-bit vectors concatenated)
169 * into the correct vfp.regs[] element plus a bit offset
170 * into that element, bearing in mind that the table
171 * can wrap around from V31 to V0.
172 */
173 int elt = (rn * 2 + (index >> 3)) % 64;
174 int bitidx = (index & 7) * 8;
175 uint64_t val = extract64(env->vfp.regs[elt], bitidx, 8);
176
177 result = deposit64(result, shift, 8, val);
178 }
179 }
180 return result;
181 }
182
183 /* Helper function for 64 bit polynomial multiply case:
184 * perform PolynomialMult(op1, op2) and return either the top or
185 * bottom half of the 128 bit result.
186 */
187 uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
188 {
189 int bitnum;
190 uint64_t res = 0;
191
192 for (bitnum = 0; bitnum < 64; bitnum++) {
193 if (op1 & (1ULL << bitnum)) {
194 res ^= op2 << bitnum;
195 }
196 }
197 return res;
198 }
199 uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
200 {
201 int bitnum;
202 uint64_t res = 0;
203
204 /* bit 0 of op1 can't influence the high 64 bits at all */
205 for (bitnum = 1; bitnum < 64; bitnum++) {
206 if (op1 & (1ULL << bitnum)) {
207 res ^= op2 >> (64 - bitnum);
208 }
209 }
210 return res;
211 }
212
213 /* 64bit/double versions of the neon float compare functions */
214 uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
215 {
216 float_status *fpst = fpstp;
217 return -float64_eq_quiet(a, b, fpst);
218 }
219
220 uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
221 {
222 float_status *fpst = fpstp;
223 return -float64_le(b, a, fpst);
224 }
225
226 uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
227 {
228 float_status *fpst = fpstp;
229 return -float64_lt(b, a, fpst);
230 }
231
232 /* Reciprocal step and sqrt step. Note that unlike the A32/T32
233 * versions, these do a fully fused multiply-add or
234 * multiply-add-and-halve.
235 */
236 #define float32_two make_float32(0x40000000)
237 #define float32_three make_float32(0x40400000)
238 #define float32_one_point_five make_float32(0x3fc00000)
239
240 #define float64_two make_float64(0x4000000000000000ULL)
241 #define float64_three make_float64(0x4008000000000000ULL)
242 #define float64_one_point_five make_float64(0x3FF8000000000000ULL)
243
244 float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
245 {
246 float_status *fpst = fpstp;
247
248 a = float32_chs(a);
249 if ((float32_is_infinity(a) && float32_is_zero(b)) ||
250 (float32_is_infinity(b) && float32_is_zero(a))) {
251 return float32_two;
252 }
253 return float32_muladd(a, b, float32_two, 0, fpst);
254 }
255
256 float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
257 {
258 float_status *fpst = fpstp;
259
260 a = float64_chs(a);
261 if ((float64_is_infinity(a) && float64_is_zero(b)) ||
262 (float64_is_infinity(b) && float64_is_zero(a))) {
263 return float64_two;
264 }
265 return float64_muladd(a, b, float64_two, 0, fpst);
266 }
267
268 float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
269 {
270 float_status *fpst = fpstp;
271
272 a = float32_chs(a);
273 if ((float32_is_infinity(a) && float32_is_zero(b)) ||
274 (float32_is_infinity(b) && float32_is_zero(a))) {
275 return float32_one_point_five;
276 }
277 return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
278 }
279
280 float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
281 {
282 float_status *fpst = fpstp;
283
284 a = float64_chs(a);
285 if ((float64_is_infinity(a) && float64_is_zero(b)) ||
286 (float64_is_infinity(b) && float64_is_zero(a))) {
287 return float64_one_point_five;
288 }
289 return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
290 }