]>
Commit | Line | Data |
---|---|---|
b3e22b23 PB |
1 | /* |
2 | * New-style TCG opcode generator for i386 instructions | |
3 | * | |
4 | * Copyright (c) 2022 Red Hat, Inc. | |
5 | * | |
6 | * Author: Paolo Bonzini <pbonzini@redhat.com> | |
7 | * | |
8 | * This library is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License as published by the Free Software Foundation; either | |
11 | * version 2.1 of the License, or (at your option) any later version. | |
12 | * | |
13 | * This library is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * Lesser General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU Lesser General Public | |
19 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
20 | */ | |
21 | ||
653fad24 PB |
22 | #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) |
23 | ||
24 | typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); | |
25 | typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); | |
71a0891d | 26 | typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b); |
653fad24 PB |
27 | typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, |
28 | TCGv_ptr reg_c); | |
29 | typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, | |
30 | TCGv_ptr reg_c, TCGv_ptr reg_d); | |
31 | typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, | |
32 | TCGv_i32 val); | |
33 | typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, | |
34 | TCGv_ptr reg_c, TCGv_i32 val); | |
35 | typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); | |
36 | typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, | |
37 | TCGv_i32 val); | |
38 | typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, | |
39 | TCGv val); | |
16fc5726 PB |
40 | typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, |
41 | TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); | |
2872b0f3 PB |
42 | typedef void (*SSEFunc_0_eppppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, |
43 | TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 flags); | |
44 | typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, | |
45 | TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, | |
46 | TCGv_i32 odd); | |
16fc5726 | 47 | |
ce4fcb94 PB |
48 | static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) |
49 | { | |
50 | return tcg_constant_i32(val); | |
51 | } | |
52 | ||
20581aad PB |
53 | static void gen_NM_exception(DisasContext *s) |
54 | { | |
55 | gen_exception(s, EXCP07_PREX); | |
56 | } | |
57 | ||
b3e22b23 PB |
58 | static void gen_illegal(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
59 | { | |
60 | gen_illegal_opcode(s); | |
61 | } | |
62 | ||
20581aad | 63 | static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) |
b3e22b23 | 64 | { |
20581aad | 65 | TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); |
b3e22b23 PB |
66 | gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); |
67 | } | |
6ba13999 PB |
68 | |
69 | static inline int mmx_offset(MemOp ot) | |
70 | { | |
71 | switch (ot) { | |
72 | case MO_8: | |
73 | return offsetof(MMXReg, MMX_B(0)); | |
74 | case MO_16: | |
75 | return offsetof(MMXReg, MMX_W(0)); | |
76 | case MO_32: | |
77 | return offsetof(MMXReg, MMX_L(0)); | |
78 | case MO_64: | |
79 | return offsetof(MMXReg, MMX_Q(0)); | |
80 | default: | |
81 | g_assert_not_reached(); | |
82 | } | |
83 | } | |
84 | ||
85 | static inline int xmm_offset(MemOp ot) | |
86 | { | |
87 | switch (ot) { | |
88 | case MO_8: | |
89 | return offsetof(ZMMReg, ZMM_B(0)); | |
90 | case MO_16: | |
91 | return offsetof(ZMMReg, ZMM_W(0)); | |
92 | case MO_32: | |
93 | return offsetof(ZMMReg, ZMM_L(0)); | |
94 | case MO_64: | |
95 | return offsetof(ZMMReg, ZMM_Q(0)); | |
96 | case MO_128: | |
97 | return offsetof(ZMMReg, ZMM_X(0)); | |
98 | case MO_256: | |
99 | return offsetof(ZMMReg, ZMM_Y(0)); | |
100 | default: | |
101 | g_assert_not_reached(); | |
102 | } | |
103 | } | |
104 | ||
92ec056a PB |
105 | static int vector_reg_offset(X86DecodedOp *op) |
106 | { | |
107 | assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE); | |
108 | ||
109 | if (op->unit == X86_OP_MMX) { | |
110 | return op->offset - mmx_offset(op->ot); | |
111 | } else { | |
112 | return op->offset - xmm_offset(op->ot); | |
113 | } | |
114 | } | |
115 | ||
116 | static int vector_elem_offset(X86DecodedOp *op, MemOp ot, int n) | |
117 | { | |
118 | int base_ofs = vector_reg_offset(op); | |
119 | switch(ot) { | |
120 | case MO_8: | |
121 | if (op->unit == X86_OP_MMX) { | |
122 | return base_ofs + offsetof(MMXReg, MMX_B(n)); | |
123 | } else { | |
124 | return base_ofs + offsetof(ZMMReg, ZMM_B(n)); | |
125 | } | |
126 | case MO_16: | |
127 | if (op->unit == X86_OP_MMX) { | |
128 | return base_ofs + offsetof(MMXReg, MMX_W(n)); | |
129 | } else { | |
130 | return base_ofs + offsetof(ZMMReg, ZMM_W(n)); | |
131 | } | |
132 | case MO_32: | |
133 | if (op->unit == X86_OP_MMX) { | |
134 | return base_ofs + offsetof(MMXReg, MMX_L(n)); | |
135 | } else { | |
136 | return base_ofs + offsetof(ZMMReg, ZMM_L(n)); | |
137 | } | |
138 | case MO_64: | |
139 | if (op->unit == X86_OP_MMX) { | |
140 | return base_ofs; | |
141 | } else { | |
142 | return base_ofs + offsetof(ZMMReg, ZMM_Q(n)); | |
143 | } | |
144 | case MO_128: | |
145 | assert(op->unit == X86_OP_SSE); | |
146 | return base_ofs + offsetof(ZMMReg, ZMM_X(n)); | |
147 | case MO_256: | |
148 | assert(op->unit == X86_OP_SSE); | |
149 | return base_ofs + offsetof(ZMMReg, ZMM_Y(n)); | |
150 | default: | |
151 | g_assert_not_reached(); | |
152 | } | |
153 | } | |
154 | ||
6ba13999 PB |
155 | static void compute_mmx_offset(X86DecodedOp *op) |
156 | { | |
157 | if (!op->has_ea) { | |
158 | op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot); | |
159 | } else { | |
160 | op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot); | |
161 | } | |
162 | } | |
163 | ||
164 | static void compute_xmm_offset(X86DecodedOp *op) | |
165 | { | |
166 | if (!op->has_ea) { | |
167 | op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot); | |
168 | } else { | |
169 | op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot); | |
170 | } | |
171 | } | |
172 | ||
173 | static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned) | |
174 | { | |
175 | switch(ot) { | |
176 | case MO_8: | |
177 | gen_op_ld_v(s, MO_8, temp, s->A0); | |
ad75a51e | 178 | tcg_gen_st8_tl(temp, tcg_env, dest_ofs); |
6ba13999 PB |
179 | break; |
180 | case MO_16: | |
181 | gen_op_ld_v(s, MO_16, temp, s->A0); | |
ad75a51e | 182 | tcg_gen_st16_tl(temp, tcg_env, dest_ofs); |
6ba13999 PB |
183 | break; |
184 | case MO_32: | |
185 | gen_op_ld_v(s, MO_32, temp, s->A0); | |
ad75a51e | 186 | tcg_gen_st32_tl(temp, tcg_env, dest_ofs); |
6ba13999 PB |
187 | break; |
188 | case MO_64: | |
189 | gen_ldq_env_A0(s, dest_ofs); | |
190 | break; | |
191 | case MO_128: | |
192 | gen_ldo_env_A0(s, dest_ofs, aligned); | |
193 | break; | |
194 | case MO_256: | |
195 | gen_ldy_env_A0(s, dest_ofs, aligned); | |
196 | break; | |
197 | default: | |
198 | g_assert_not_reached(); | |
199 | } | |
200 | } | |
201 | ||
20581aad PB |
202 | static bool sse_needs_alignment(DisasContext *s, X86DecodedInsn *decode, MemOp ot) |
203 | { | |
204 | switch (decode->e.vex_class) { | |
205 | case 2: | |
206 | case 4: | |
207 | if ((s->prefix & PREFIX_VEX) || | |
208 | decode->e.vex_special == X86_VEX_SSEUnaligned) { | |
209 | /* MOST legacy SSE instructions require aligned memory operands, but not all. */ | |
210 | return false; | |
211 | } | |
212 | /* fall through */ | |
213 | case 1: | |
214 | return ot >= MO_128; | |
215 | ||
216 | default: | |
217 | return false; | |
218 | } | |
219 | } | |
220 | ||
6ba13999 PB |
221 | static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) |
222 | { | |
223 | X86DecodedOp *op = &decode->op[opn]; | |
224 | ||
225 | switch (op->unit) { | |
226 | case X86_OP_SKIP: | |
227 | return; | |
228 | case X86_OP_SEG: | |
ad75a51e | 229 | tcg_gen_ld32u_tl(v, tcg_env, |
6ba13999 PB |
230 | offsetof(CPUX86State,segs[op->n].selector)); |
231 | break; | |
232 | case X86_OP_CR: | |
ad75a51e | 233 | tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n])); |
6ba13999 PB |
234 | break; |
235 | case X86_OP_DR: | |
ad75a51e | 236 | tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n])); |
6ba13999 PB |
237 | break; |
238 | case X86_OP_INT: | |
239 | if (op->has_ea) { | |
240 | gen_op_ld_v(s, op->ot, v, s->A0); | |
241 | } else { | |
242 | gen_op_mov_v_reg(s, op->ot, v, op->n); | |
243 | } | |
244 | break; | |
245 | case X86_OP_IMM: | |
246 | tcg_gen_movi_tl(v, decode->immediate); | |
247 | break; | |
248 | ||
249 | case X86_OP_MMX: | |
250 | compute_mmx_offset(op); | |
251 | goto load_vector; | |
252 | ||
253 | case X86_OP_SSE: | |
254 | compute_xmm_offset(op); | |
255 | load_vector: | |
256 | if (op->has_ea) { | |
20581aad PB |
257 | bool aligned = sse_needs_alignment(s, decode, op->ot); |
258 | gen_load_sse(s, v, op->ot, op->offset, aligned); | |
6ba13999 PB |
259 | } |
260 | break; | |
261 | ||
262 | default: | |
263 | g_assert_not_reached(); | |
264 | } | |
265 | } | |
266 | ||
92ec056a PB |
267 | static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn) |
268 | { | |
269 | X86DecodedOp *op = &decode->op[opn]; | |
270 | if (op->v_ptr) { | |
271 | return op->v_ptr; | |
272 | } | |
273 | op->v_ptr = tcg_temp_new_ptr(); | |
274 | ||
275 | /* The temporary points to the MMXReg or ZMMReg. */ | |
ad75a51e | 276 | tcg_gen_addi_ptr(op->v_ptr, tcg_env, vector_reg_offset(op)); |
92ec056a PB |
277 | return op->v_ptr; |
278 | } | |
279 | ||
280 | #define OP_PTR0 op_ptr(decode, 0) | |
281 | #define OP_PTR1 op_ptr(decode, 1) | |
282 | #define OP_PTR2 op_ptr(decode, 2) | |
283 | ||
6ba13999 PB |
284 | static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) |
285 | { | |
286 | X86DecodedOp *op = &decode->op[opn]; | |
287 | switch (op->unit) { | |
288 | case X86_OP_SKIP: | |
289 | break; | |
290 | case X86_OP_SEG: | |
291 | /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */ | |
292 | gen_movl_seg_T0(s, op->n); | |
293 | break; | |
294 | case X86_OP_INT: | |
295 | if (op->has_ea) { | |
296 | gen_op_st_v(s, op->ot, v, s->A0); | |
297 | } else { | |
298 | gen_op_mov_reg_v(s, op->ot, op->n, v); | |
299 | } | |
300 | break; | |
301 | case X86_OP_MMX: | |
20581aad | 302 | break; |
6ba13999 | 303 | case X86_OP_SSE: |
cf5ec664 | 304 | if (!op->has_ea && (s->prefix & PREFIX_VEX) && op->ot <= MO_128) { |
20581aad PB |
305 | tcg_gen_gvec_dup_imm(MO_64, |
306 | offsetof(CPUX86State, xmm_regs[op->n].ZMM_X(1)), | |
307 | 16, 16, 0); | |
308 | } | |
6ba13999 PB |
309 | break; |
310 | case X86_OP_CR: | |
311 | case X86_OP_DR: | |
312 | default: | |
313 | g_assert_not_reached(); | |
314 | } | |
315 | } | |
1d0b9261 | 316 | |
92ec056a PB |
317 | static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) |
318 | { | |
319 | if (decode->e.special == X86_SPECIAL_MMX && | |
320 | !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { | |
321 | return 8; | |
322 | } | |
323 | return s->vex_l ? 32 : 16; | |
324 | } | |
325 | ||
326 | static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) | |
327 | { | |
328 | MemOp ot = decode->op[0].ot; | |
329 | int vec_len = vector_len(s, decode); | |
330 | bool aligned = sse_needs_alignment(s, decode, ot); | |
331 | ||
332 | if (!decode->op[0].has_ea) { | |
333 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, vec_len, vec_len); | |
334 | return; | |
335 | } | |
336 | ||
337 | switch (ot) { | |
338 | case MO_64: | |
339 | gen_stq_env_A0(s, src_ofs); | |
340 | break; | |
341 | case MO_128: | |
342 | gen_sto_env_A0(s, src_ofs, aligned); | |
343 | break; | |
344 | case MO_256: | |
345 | gen_sty_env_A0(s, src_ofs, aligned); | |
346 | break; | |
347 | default: | |
348 | g_assert_not_reached(); | |
349 | } | |
350 | } | |
351 | ||
71a0891d PB |
352 | static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b) |
353 | { | |
354 | gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); | |
355 | } | |
356 | ||
357 | #define FN_3DNOW_MOVE ((SSEFunc_0_epp) (uintptr_t) 1) | |
358 | static const SSEFunc_0_epp fns_3dnow[] = { | |
359 | [0x0c] = gen_helper_pi2fw, | |
360 | [0x0d] = gen_helper_pi2fd, | |
361 | [0x1c] = gen_helper_pf2iw, | |
362 | [0x1d] = gen_helper_pf2id, | |
363 | [0x8a] = gen_helper_pfnacc, | |
364 | [0x8e] = gen_helper_pfpnacc, | |
365 | [0x90] = gen_helper_pfcmpge, | |
366 | [0x94] = gen_helper_pfmin, | |
367 | [0x96] = gen_helper_pfrcp, | |
368 | [0x97] = gen_helper_pfrsqrt, | |
369 | [0x9a] = gen_helper_pfsub, | |
370 | [0x9e] = gen_helper_pfadd, | |
371 | [0xa0] = gen_helper_pfcmpgt, | |
372 | [0xa4] = gen_helper_pfmax, | |
373 | [0xa6] = FN_3DNOW_MOVE, /* PFRCPIT1; no need to actually increase precision */ | |
374 | [0xa7] = FN_3DNOW_MOVE, /* PFRSQIT1 */ | |
375 | [0xb6] = FN_3DNOW_MOVE, /* PFRCPIT2 */ | |
376 | [0xaa] = gen_helper_pfsubr, | |
377 | [0xae] = gen_helper_pfacc, | |
378 | [0xb0] = gen_helper_pfcmpeq, | |
379 | [0xb4] = gen_helper_pfmul, | |
380 | [0xb7] = gen_helper_pmulhrw_mmx, | |
381 | [0xbb] = gen_helper_pswapd, | |
382 | [0xbf] = gen_helper_pavgusb, | |
383 | }; | |
384 | ||
385 | static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
386 | { | |
387 | uint8_t b = decode->immediate; | |
388 | SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL; | |
389 | ||
390 | if (!fn) { | |
391 | gen_illegal_opcode(s); | |
392 | return; | |
393 | } | |
394 | if (s->flags & HF_TS_MASK) { | |
395 | gen_NM_exception(s); | |
396 | return; | |
397 | } | |
398 | if (s->flags & HF_EM_MASK) { | |
399 | gen_illegal_opcode(s); | |
400 | return; | |
401 | } | |
402 | ||
ad75a51e | 403 | gen_helper_enter_mmx(tcg_env); |
71a0891d | 404 | if (fn == FN_3DNOW_MOVE) { |
ad75a51e RH |
405 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset); |
406 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset); | |
71a0891d | 407 | } else { |
ad75a51e | 408 | fn(tcg_env, OP_PTR0, OP_PTR1); |
71a0891d PB |
409 | } |
410 | } | |
411 | ||
03b45880 PB |
412 | /* |
413 | * 00 = v*ps Vps, Hps, Wpd | |
414 | * 66 = v*pd Vpd, Hpd, Wps | |
415 | * f3 = v*ss Vss, Hss, Wps | |
416 | * f2 = v*sd Vsd, Hsd, Wps | |
417 | */ | |
418 | static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
419 | SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm, | |
420 | SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm, | |
421 | SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) | |
422 | { | |
423 | if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { | |
424 | SSEFunc_0_eppp fn = s->prefix & PREFIX_REPZ ? ss : sd; | |
425 | if (!fn) { | |
426 | gen_illegal_opcode(s); | |
427 | return; | |
428 | } | |
ad75a51e | 429 | fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
03b45880 PB |
430 | } else { |
431 | SSEFunc_0_epp ps, pd, fn; | |
432 | ps = s->vex_l ? ps_ymm : ps_xmm; | |
433 | pd = s->vex_l ? pd_ymm : pd_xmm; | |
434 | fn = s->prefix & PREFIX_DATA ? pd : ps; | |
435 | if (!fn) { | |
436 | gen_illegal_opcode(s); | |
437 | return; | |
438 | } | |
ad75a51e | 439 | fn(tcg_env, OP_PTR0, OP_PTR2); |
03b45880 PB |
440 | } |
441 | } | |
442 | #define UNARY_FP_SSE(uname, lname) \ | |
443 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
444 | { \ | |
445 | gen_unary_fp_sse(s, env, decode, \ | |
446 | gen_helper_##lname##pd_xmm, \ | |
447 | gen_helper_##lname##ps_xmm, \ | |
448 | gen_helper_##lname##pd_ymm, \ | |
449 | gen_helper_##lname##ps_ymm, \ | |
450 | gen_helper_##lname##sd, \ | |
451 | gen_helper_##lname##ss); \ | |
452 | } | |
453 | UNARY_FP_SSE(VSQRT, sqrt) | |
454 | ||
455 | /* | |
456 | * 00 = v*ps Vps, Hps, Wpd | |
457 | * 66 = v*pd Vpd, Hpd, Wps | |
458 | * f3 = v*ss Vss, Hss, Wps | |
459 | * f2 = v*sd Vsd, Hsd, Wps | |
460 | */ | |
461 | static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
462 | SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, | |
463 | SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm, | |
464 | SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) | |
465 | { | |
466 | SSEFunc_0_eppp ps, pd, fn; | |
467 | if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { | |
468 | fn = s->prefix & PREFIX_REPZ ? ss : sd; | |
469 | } else { | |
470 | ps = s->vex_l ? ps_ymm : ps_xmm; | |
471 | pd = s->vex_l ? pd_ymm : pd_xmm; | |
472 | fn = s->prefix & PREFIX_DATA ? pd : ps; | |
473 | } | |
474 | if (fn) { | |
ad75a51e | 475 | fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
03b45880 PB |
476 | } else { |
477 | gen_illegal_opcode(s); | |
478 | } | |
479 | } | |
7170a17e | 480 | |
03b45880 PB |
481 | #define FP_SSE(uname, lname) \ |
482 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
483 | { \ | |
484 | gen_fp_sse(s, env, decode, \ | |
485 | gen_helper_##lname##pd_xmm, \ | |
486 | gen_helper_##lname##ps_xmm, \ | |
487 | gen_helper_##lname##pd_ymm, \ | |
488 | gen_helper_##lname##ps_ymm, \ | |
489 | gen_helper_##lname##sd, \ | |
490 | gen_helper_##lname##ss); \ | |
491 | } | |
492 | FP_SSE(VADD, add) | |
493 | FP_SSE(VMUL, mul) | |
494 | FP_SSE(VSUB, sub) | |
495 | FP_SSE(VMIN, min) | |
496 | FP_SSE(VDIV, div) | |
497 | FP_SSE(VMAX, max) | |
498 | ||
2872b0f3 PB |
499 | #define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \ |
500 | static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
501 | { \ | |
502 | SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \ | |
503 | SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \ | |
504 | SSEFunc_0_eppppii fn = s->vex_l ? ymm : xmm; \ | |
505 | \ | |
ad75a51e | 506 | fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ |
2872b0f3 PB |
507 | tcg_constant_i32(even), \ |
508 | tcg_constant_i32((even) ^ (odd))); \ | |
509 | } | |
510 | ||
511 | #define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \ | |
512 | FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \ | |
513 | static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
514 | { \ | |
515 | SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \ | |
516 | \ | |
ad75a51e | 517 | fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ |
2872b0f3 PB |
518 | tcg_constant_i32(flags)); \ |
519 | } \ | |
520 | ||
521 | FMA_SSE(VFMADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0) | |
522 | FMA_SSE(VFMADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0) | |
523 | FMA_SSE(VFMADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0) | |
524 | ||
525 | FMA_SSE(VFNMADD231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_product) | |
526 | FMA_SSE(VFNMADD213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_product) | |
527 | FMA_SSE(VFNMADD132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_product) | |
528 | ||
529 | FMA_SSE(VFMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c) | |
530 | FMA_SSE(VFMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c) | |
531 | FMA_SSE(VFMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c) | |
532 | ||
533 | FMA_SSE(VFNMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c|float_muladd_negate_product) | |
534 | FMA_SSE(VFNMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c|float_muladd_negate_product) | |
535 | FMA_SSE(VFNMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c|float_muladd_negate_product) | |
536 | ||
537 | FMA_SSE_PACKED(VFMADDSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c, 0) | |
538 | FMA_SSE_PACKED(VFMADDSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c, 0) | |
539 | FMA_SSE_PACKED(VFMADDSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c, 0) | |
540 | ||
541 | FMA_SSE_PACKED(VFMSUBADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0, float_muladd_negate_c) | |
542 | FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c) | |
543 | FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c) | |
544 | ||
7170a17e PB |
545 | #define FP_UNPACK_SSE(uname, lname) \ |
546 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
547 | { \ | |
548 | /* PS maps to the DQ integer instruction, PD maps to QDQ. */ \ | |
549 | gen_fp_sse(s, env, decode, \ | |
550 | gen_helper_##lname##qdq_xmm, \ | |
551 | gen_helper_##lname##dq_xmm, \ | |
552 | gen_helper_##lname##qdq_ymm, \ | |
553 | gen_helper_##lname##dq_ymm, \ | |
554 | NULL, NULL); \ | |
555 | } | |
556 | FP_UNPACK_SSE(VUNPCKLPx, punpckl) | |
557 | FP_UNPACK_SSE(VUNPCKHPx, punpckh) | |
558 | ||
03b45880 PB |
559 | /* |
560 | * 00 = v*ps Vps, Wpd | |
561 | * f3 = v*ss Vss, Wps | |
562 | */ | |
563 | static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
564 | SSEFunc_0_epp ps_xmm, | |
565 | SSEFunc_0_epp ps_ymm, | |
566 | SSEFunc_0_eppp ss) | |
567 | { | |
568 | if ((s->prefix & (PREFIX_DATA | PREFIX_REPNZ)) != 0) { | |
569 | goto illegal_op; | |
570 | } else if (s->prefix & PREFIX_REPZ) { | |
571 | if (!ss) { | |
572 | goto illegal_op; | |
573 | } | |
ad75a51e | 574 | ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
03b45880 PB |
575 | } else { |
576 | SSEFunc_0_epp fn = s->vex_l ? ps_ymm : ps_xmm; | |
577 | if (!fn) { | |
578 | goto illegal_op; | |
579 | } | |
ad75a51e | 580 | fn(tcg_env, OP_PTR0, OP_PTR2); |
03b45880 PB |
581 | } |
582 | return; | |
583 | ||
584 | illegal_op: | |
585 | gen_illegal_opcode(s); | |
586 | } | |
587 | #define UNARY_FP32_SSE(uname, lname) \ | |
588 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
589 | { \ | |
590 | gen_unary_fp32_sse(s, env, decode, \ | |
591 | gen_helper_##lname##ps_xmm, \ | |
592 | gen_helper_##lname##ps_ymm, \ | |
593 | gen_helper_##lname##ss); \ | |
594 | } | |
595 | UNARY_FP32_SSE(VRSQRT, rsqrt) | |
596 | UNARY_FP32_SSE(VRCP, rcp) | |
597 | ||
d1c1a422 PB |
598 | /* |
599 | * 66 = v*pd Vpd, Hpd, Wpd | |
600 | * f2 = v*ps Vps, Hps, Wps | |
601 | */ | |
602 | static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
603 | SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, | |
604 | SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm) | |
605 | { | |
606 | SSEFunc_0_eppp ps, pd, fn; | |
607 | ps = s->vex_l ? ps_ymm : ps_xmm; | |
608 | pd = s->vex_l ? pd_ymm : pd_xmm; | |
609 | fn = s->prefix & PREFIX_DATA ? pd : ps; | |
ad75a51e | 610 | fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
d1c1a422 PB |
611 | } |
612 | #define HORIZONTAL_FP_SSE(uname, lname) \ | |
613 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
614 | { \ | |
615 | gen_horizontal_fp_sse(s, env, decode, \ | |
616 | gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm, \ | |
617 | gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \ | |
618 | } | |
619 | HORIZONTAL_FP_SSE(VHADD, hadd) | |
620 | HORIZONTAL_FP_SSE(VHSUB, hsub) | |
6bbeb98d | 621 | HORIZONTAL_FP_SSE(VADDSUB, addsub) |
d1c1a422 | 622 | |
79068477 PB |
623 | static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, |
624 | int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm) | |
625 | { | |
626 | SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm; | |
627 | TCGv_ptr ptr3 = tcg_temp_new_ptr(); | |
628 | ||
629 | /* The format of the fourth input is Lx */ | |
ad75a51e RH |
630 | tcg_gen_addi_ptr(ptr3, tcg_env, ZMM_OFFSET(op3)); |
631 | fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); | |
79068477 | 632 | } |
16fc5726 | 633 | #define TERNARY_SSE(uname, uvname, lname) \ |
79068477 PB |
634 | static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ |
635 | { \ | |
636 | gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, \ | |
637 | gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ | |
16fc5726 PB |
638 | } \ |
639 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
640 | { \ | |
641 | gen_ternary_sse(s, env, decode, 0, \ | |
642 | gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ | |
79068477 | 643 | } |
16fc5726 PB |
644 | TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps) |
645 | TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd) | |
646 | TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb) | |
79068477 PB |
647 | |
648 | static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
649 | SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm) | |
650 | { | |
651 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
652 | if (!s->vex_l) { | |
ad75a51e | 653 | xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 | 654 | } else { |
ad75a51e | 655 | ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
656 | } |
657 | } | |
658 | ||
659 | #define BINARY_IMM_SSE(uname, lname) \ | |
660 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
661 | { \ | |
662 | gen_binary_imm_sse(s, env, decode, \ | |
663 | gen_helper_##lname##_xmm, \ | |
664 | gen_helper_##lname##_ymm); \ | |
665 | } | |
666 | ||
667 | BINARY_IMM_SSE(VBLENDPD, blendpd) | |
668 | BINARY_IMM_SSE(VBLENDPS, blendps) | |
669 | BINARY_IMM_SSE(VPBLENDW, pblendw) | |
670 | BINARY_IMM_SSE(VDDPS, dpps) | |
671 | #define gen_helper_dppd_ymm NULL | |
672 | BINARY_IMM_SSE(VDDPD, dppd) | |
673 | BINARY_IMM_SSE(VMPSADBW, mpsadbw) | |
674 | BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) | |
675 | ||
16fc5726 PB |
676 | |
677 | #define UNARY_INT_GVEC(uname, func, ...) \ | |
678 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
679 | { \ | |
680 | int vec_len = vector_len(s, decode); \ | |
681 | \ | |
682 | func(__VA_ARGS__, decode->op[0].offset, \ | |
683 | decode->op[2].offset, vec_len, vec_len); \ | |
684 | } | |
685 | UNARY_INT_GVEC(PABSB, tcg_gen_gvec_abs, MO_8) | |
686 | UNARY_INT_GVEC(PABSW, tcg_gen_gvec_abs, MO_16) | |
687 | UNARY_INT_GVEC(PABSD, tcg_gen_gvec_abs, MO_32) | |
688 | UNARY_INT_GVEC(VBROADCASTx128, tcg_gen_gvec_dup_mem, MO_128) | |
689 | UNARY_INT_GVEC(VPBROADCASTB, tcg_gen_gvec_dup_mem, MO_8) | |
690 | UNARY_INT_GVEC(VPBROADCASTW, tcg_gen_gvec_dup_mem, MO_16) | |
691 | UNARY_INT_GVEC(VPBROADCASTD, tcg_gen_gvec_dup_mem, MO_32) | |
692 | UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64) | |
693 | ||
694 | ||
92ec056a PB |
695 | #define BINARY_INT_GVEC(uname, func, ...) \ |
696 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
697 | { \ | |
698 | int vec_len = vector_len(s, decode); \ | |
699 | \ | |
700 | func(__VA_ARGS__, \ | |
701 | decode->op[0].offset, decode->op[1].offset, \ | |
702 | decode->op[2].offset, vec_len, vec_len); \ | |
703 | } | |
704 | ||
1d0efbdb PB |
705 | BINARY_INT_GVEC(PADDB, tcg_gen_gvec_add, MO_8) |
706 | BINARY_INT_GVEC(PADDW, tcg_gen_gvec_add, MO_16) | |
707 | BINARY_INT_GVEC(PADDD, tcg_gen_gvec_add, MO_32) | |
6bbeb98d | 708 | BINARY_INT_GVEC(PADDQ, tcg_gen_gvec_add, MO_64) |
1d0efbdb PB |
709 | BINARY_INT_GVEC(PADDSB, tcg_gen_gvec_ssadd, MO_8) |
710 | BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) | |
711 | BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) | |
712 | BINARY_INT_GVEC(PADDUSW, tcg_gen_gvec_usadd, MO_16) | |
713 | BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) | |
ce4fcb94 PB |
714 | BINARY_INT_GVEC(PCMPEQB, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_8) |
715 | BINARY_INT_GVEC(PCMPEQD, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_32) | |
716 | BINARY_INT_GVEC(PCMPEQW, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_16) | |
16fc5726 | 717 | BINARY_INT_GVEC(PCMPEQQ, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_64) |
92ec056a PB |
718 | BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) |
719 | BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) | |
720 | BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) | |
16fc5726 PB |
721 | BINARY_INT_GVEC(PCMPGTQ, tcg_gen_gvec_cmp, TCG_COND_GT, MO_64) |
722 | BINARY_INT_GVEC(PMAXSB, tcg_gen_gvec_smax, MO_8) | |
1d0efbdb | 723 | BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) |
16fc5726 | 724 | BINARY_INT_GVEC(PMAXSD, tcg_gen_gvec_smax, MO_32) |
1d0efbdb | 725 | BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) |
16fc5726 PB |
726 | BINARY_INT_GVEC(PMAXUW, tcg_gen_gvec_umax, MO_16) |
727 | BINARY_INT_GVEC(PMAXUD, tcg_gen_gvec_umax, MO_32) | |
728 | BINARY_INT_GVEC(PMINSB, tcg_gen_gvec_smin, MO_8) | |
1d0efbdb | 729 | BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) |
16fc5726 | 730 | BINARY_INT_GVEC(PMINSD, tcg_gen_gvec_smin, MO_32) |
1d0efbdb | 731 | BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) |
16fc5726 PB |
732 | BINARY_INT_GVEC(PMINUW, tcg_gen_gvec_umin, MO_16) |
733 | BINARY_INT_GVEC(PMINUD, tcg_gen_gvec_umin, MO_32) | |
6bbeb98d | 734 | BINARY_INT_GVEC(PMULLW, tcg_gen_gvec_mul, MO_16) |
16fc5726 | 735 | BINARY_INT_GVEC(PMULLD, tcg_gen_gvec_mul, MO_32) |
1d0efbdb PB |
736 | BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) |
737 | BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) | |
738 | BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) | |
739 | BINARY_INT_GVEC(PSUBD, tcg_gen_gvec_sub, MO_32) | |
740 | BINARY_INT_GVEC(PSUBQ, tcg_gen_gvec_sub, MO_64) | |
741 | BINARY_INT_GVEC(PSUBSB, tcg_gen_gvec_sssub, MO_8) | |
742 | BINARY_INT_GVEC(PSUBSW, tcg_gen_gvec_sssub, MO_16) | |
743 | BINARY_INT_GVEC(PSUBUSB, tcg_gen_gvec_ussub, MO_8) | |
744 | BINARY_INT_GVEC(PSUBUSW, tcg_gen_gvec_ussub, MO_16) | |
745 | BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64) | |
92ec056a PB |
746 | |
747 | ||
748 | /* | |
749 | * 00 = p* Pq, Qq (if mmx not NULL; no VEX) | |
750 | * 66 = vp* Vx, Hx, Wx | |
751 | * | |
752 | * These are really the same encoding, because 1) V is the same as P when VEX.V | |
753 | * is not present 2) P and Q are the same as H and W apart from MM/XMM | |
754 | */ | |
755 | static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
756 | SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm) | |
757 | { | |
758 | assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX)); | |
759 | ||
760 | if (mmx && (s->prefix & PREFIX_VEX) && !(s->prefix & PREFIX_DATA)) { | |
761 | /* VEX encoding is not applicable to MMX instructions. */ | |
762 | gen_illegal_opcode(s); | |
763 | return; | |
764 | } | |
765 | if (!(s->prefix & PREFIX_DATA)) { | |
ad75a51e | 766 | mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
92ec056a | 767 | } else if (!s->vex_l) { |
ad75a51e | 768 | xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
92ec056a | 769 | } else { |
ad75a51e | 770 | ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
92ec056a PB |
771 | } |
772 | } | |
773 | ||
774 | ||
775 | #define BINARY_INT_MMX(uname, lname) \ | |
776 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
777 | { \ | |
778 | gen_binary_int_sse(s, env, decode, \ | |
779 | gen_helper_##lname##_mmx, \ | |
780 | gen_helper_##lname##_xmm, \ | |
781 | gen_helper_##lname##_ymm); \ | |
782 | } | |
783 | BINARY_INT_MMX(PUNPCKLBW, punpcklbw) | |
784 | BINARY_INT_MMX(PUNPCKLWD, punpcklwd) | |
785 | BINARY_INT_MMX(PUNPCKLDQ, punpckldq) | |
786 | BINARY_INT_MMX(PACKSSWB, packsswb) | |
787 | BINARY_INT_MMX(PACKUSWB, packuswb) | |
788 | BINARY_INT_MMX(PUNPCKHBW, punpckhbw) | |
789 | BINARY_INT_MMX(PUNPCKHWD, punpckhwd) | |
790 | BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) | |
791 | BINARY_INT_MMX(PACKSSDW, packssdw) | |
792 | ||
6bbeb98d PB |
793 | BINARY_INT_MMX(PAVGB, pavgb) |
794 | BINARY_INT_MMX(PAVGW, pavgw) | |
795 | BINARY_INT_MMX(PMADDWD, pmaddwd) | |
796 | BINARY_INT_MMX(PMULHUW, pmulhuw) | |
797 | BINARY_INT_MMX(PMULHW, pmulhw) | |
798 | BINARY_INT_MMX(PMULUDQ, pmuludq) | |
799 | BINARY_INT_MMX(PSADBW, psadbw) | |
800 | ||
801 | BINARY_INT_MMX(PSLLW_r, psllw) | |
802 | BINARY_INT_MMX(PSLLD_r, pslld) | |
803 | BINARY_INT_MMX(PSLLQ_r, psllq) | |
804 | BINARY_INT_MMX(PSRLW_r, psrlw) | |
805 | BINARY_INT_MMX(PSRLD_r, psrld) | |
806 | BINARY_INT_MMX(PSRLQ_r, psrlq) | |
807 | BINARY_INT_MMX(PSRAW_r, psraw) | |
808 | BINARY_INT_MMX(PSRAD_r, psrad) | |
809 | ||
16fc5726 PB |
810 | BINARY_INT_MMX(PHADDW, phaddw) |
811 | BINARY_INT_MMX(PHADDSW, phaddsw) | |
812 | BINARY_INT_MMX(PHADDD, phaddd) | |
813 | BINARY_INT_MMX(PHSUBW, phsubw) | |
814 | BINARY_INT_MMX(PHSUBSW, phsubsw) | |
815 | BINARY_INT_MMX(PHSUBD, phsubd) | |
816 | BINARY_INT_MMX(PMADDUBSW, pmaddubsw) | |
817 | BINARY_INT_MMX(PSHUFB, pshufb) | |
818 | BINARY_INT_MMX(PSIGNB, psignb) | |
819 | BINARY_INT_MMX(PSIGNW, psignw) | |
820 | BINARY_INT_MMX(PSIGND, psignd) | |
821 | BINARY_INT_MMX(PMULHRSW, pmulhrsw) | |
822 | ||
92ec056a PB |
823 | /* Instructions with no MMX equivalent. */ |
824 | #define BINARY_INT_SSE(uname, lname) \ | |
825 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
826 | { \ | |
827 | gen_binary_int_sse(s, env, decode, \ | |
828 | NULL, \ | |
829 | gen_helper_##lname##_xmm, \ | |
830 | gen_helper_##lname##_ymm); \ | |
831 | } | |
832 | ||
16fc5726 | 833 | /* Instructions with no MMX equivalent. */ |
92ec056a PB |
834 | BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) |
835 | BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) | |
16fc5726 PB |
836 | BINARY_INT_SSE(VPACKUSDW, packusdw) |
837 | BINARY_INT_SSE(VPERMILPS, vpermilps) | |
838 | BINARY_INT_SSE(VPERMILPD, vpermilpd) | |
839 | BINARY_INT_SSE(VMASKMOVPS, vpmaskmovd) | |
840 | BINARY_INT_SSE(VMASKMOVPD, vpmaskmovq) | |
841 | ||
842 | BINARY_INT_SSE(PMULDQ, pmuldq) | |
843 | ||
844 | BINARY_INT_SSE(VAESDEC, aesdec) | |
845 | BINARY_INT_SSE(VAESDECLAST, aesdeclast) | |
846 | BINARY_INT_SSE(VAESENC, aesenc) | |
847 | BINARY_INT_SSE(VAESENCLAST, aesenclast) | |
848 | ||
849 | #define UNARY_CMP_SSE(uname, lname) \ | |
850 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
851 | { \ | |
852 | if (!s->vex_l) { \ | |
ad75a51e | 853 | gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2); \ |
16fc5726 | 854 | } else { \ |
ad75a51e | 855 | gen_helper_##lname##_ymm(tcg_env, OP_PTR1, OP_PTR2); \ |
16fc5726 PB |
856 | } \ |
857 | set_cc_op(s, CC_OP_EFLAGS); \ | |
858 | } | |
859 | UNARY_CMP_SSE(VPTEST, ptest) | |
860 | UNARY_CMP_SSE(VTESTPS, vtestps) | |
861 | UNARY_CMP_SSE(VTESTPD, vtestpd) | |
92ec056a | 862 | |
03b45880 PB |
863 | static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, |
864 | SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) | |
865 | { | |
866 | if (!s->vex_l) { | |
ad75a51e | 867 | xmm(tcg_env, OP_PTR0, OP_PTR2); |
03b45880 | 868 | } else { |
ad75a51e | 869 | ymm(tcg_env, OP_PTR0, OP_PTR2); |
03b45880 PB |
870 | } |
871 | } | |
872 | ||
873 | #define UNARY_INT_SSE(uname, lname) \ | |
874 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
875 | { \ | |
876 | gen_unary_int_sse(s, env, decode, \ | |
877 | gen_helper_##lname##_xmm, \ | |
878 | gen_helper_##lname##_ymm); \ | |
879 | } | |
880 | ||
16fc5726 PB |
881 | UNARY_INT_SSE(VPMOVSXBW, pmovsxbw) |
882 | UNARY_INT_SSE(VPMOVSXBD, pmovsxbd) | |
883 | UNARY_INT_SSE(VPMOVSXBQ, pmovsxbq) | |
884 | UNARY_INT_SSE(VPMOVSXWD, pmovsxwd) | |
885 | UNARY_INT_SSE(VPMOVSXWQ, pmovsxwq) | |
886 | UNARY_INT_SSE(VPMOVSXDQ, pmovsxdq) | |
887 | ||
888 | UNARY_INT_SSE(VPMOVZXBW, pmovzxbw) | |
889 | UNARY_INT_SSE(VPMOVZXBD, pmovzxbd) | |
890 | UNARY_INT_SSE(VPMOVZXBQ, pmovzxbq) | |
891 | UNARY_INT_SSE(VPMOVZXWD, pmovzxwd) | |
892 | UNARY_INT_SSE(VPMOVZXWQ, pmovzxwq) | |
893 | UNARY_INT_SSE(VPMOVZXDQ, pmovzxdq) | |
894 | ||
7170a17e PB |
895 | UNARY_INT_SSE(VMOVSLDUP, pmovsldup) |
896 | UNARY_INT_SSE(VMOVSHDUP, pmovshdup) | |
897 | UNARY_INT_SSE(VMOVDDUP, pmovdldup) | |
898 | ||
6bbeb98d PB |
899 | UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) |
900 | UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) | |
901 | UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) | |
03b45880 PB |
902 | UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) |
903 | UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) | |
904 | UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) | |
cf5ec664 | 905 | UNARY_INT_SSE(VCVTPH2PS, cvtph2ps) |
03b45880 PB |
906 | |
907 | ||
ce4fcb94 PB |
908 | static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, |
909 | SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm) | |
910 | { | |
911 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
912 | if (!s->vex_l) { | |
913 | xmm(OP_PTR0, OP_PTR1, imm); | |
914 | } else { | |
915 | ymm(OP_PTR0, OP_PTR1, imm); | |
916 | } | |
917 | } | |
918 | ||
919 | #define UNARY_IMM_SSE(uname, lname) \ | |
920 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
921 | { \ | |
922 | gen_unary_imm_sse(s, env, decode, \ | |
923 | gen_helper_##lname##_xmm, \ | |
924 | gen_helper_##lname##_ymm); \ | |
925 | } | |
926 | ||
927 | UNARY_IMM_SSE(PSHUFD, pshufd) | |
928 | UNARY_IMM_SSE(PSHUFHW, pshufhw) | |
929 | UNARY_IMM_SSE(PSHUFLW, pshuflw) | |
79068477 PB |
930 | #define gen_helper_vpermq_xmm NULL |
931 | UNARY_IMM_SSE(VPERMQ, vpermq) | |
932 | UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm) | |
933 | UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm) | |
934 | ||
935 | static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
936 | SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm) | |
937 | { | |
938 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
939 | if (!s->vex_l) { | |
ad75a51e | 940 | xmm(tcg_env, OP_PTR0, OP_PTR1, imm); |
79068477 | 941 | } else { |
ad75a51e | 942 | ymm(tcg_env, OP_PTR0, OP_PTR1, imm); |
79068477 PB |
943 | } |
944 | } | |
945 | ||
946 | #define UNARY_IMM_FP_SSE(uname, lname) \ | |
947 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
948 | { \ | |
949 | gen_unary_imm_fp_sse(s, env, decode, \ | |
950 | gen_helper_##lname##_xmm, \ | |
951 | gen_helper_##lname##_ymm); \ | |
952 | } | |
953 | ||
954 | UNARY_IMM_FP_SSE(VROUNDPS, roundps) | |
955 | UNARY_IMM_FP_SSE(VROUNDPD, roundpd) | |
ce4fcb94 | 956 | |
16fc5726 PB |
957 | static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, |
958 | SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm, | |
959 | SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm) | |
960 | { | |
961 | SSEFunc_0_eppp d = s->vex_l ? d_ymm : d_xmm; | |
962 | SSEFunc_0_eppp q = s->vex_l ? q_ymm : q_xmm; | |
963 | SSEFunc_0_eppp fn = s->vex_w ? q : d; | |
ad75a51e | 964 | fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
16fc5726 PB |
965 | } |
966 | ||
967 | /* VEX.W affects whether to operate on 32- or 64-bit elements. */ | |
968 | #define VEXW_AVX(uname, lname) \ | |
969 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
970 | { \ | |
971 | gen_vexw_avx(s, env, decode, \ | |
972 | gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ | |
973 | gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ | |
974 | } | |
975 | VEXW_AVX(VPSLLV, vpsllv) | |
976 | VEXW_AVX(VPSRLV, vpsrlv) | |
977 | VEXW_AVX(VPSRAV, vpsrav) | |
978 | VEXW_AVX(VPMASKMOV, vpmaskmov) | |
979 | ||
980 | /* Same as above, but with extra arguments to the helper. */ | |
981 | static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
982 | SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm, | |
983 | SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm) | |
984 | { | |
985 | SSEFunc_0_epppti d = s->vex_l ? d_ymm : d_xmm; | |
986 | SSEFunc_0_epppti q = s->vex_l ? q_ymm : q_xmm; | |
987 | SSEFunc_0_epppti fn = s->vex_w ? q : d; | |
988 | TCGv_i32 scale = tcg_constant_i32(decode->mem.scale); | |
989 | TCGv_ptr index = tcg_temp_new_ptr(); | |
990 | ||
991 | /* Pass third input as (index, base, scale) */ | |
ad75a51e RH |
992 | tcg_gen_addi_ptr(index, tcg_env, ZMM_OFFSET(decode->mem.index)); |
993 | fn(tcg_env, OP_PTR0, OP_PTR1, index, s->A0, scale); | |
16fc5726 PB |
994 | |
995 | /* | |
996 | * There are two output operands, so zero OP1's high 128 bits | |
997 | * in the VEX.128 case. | |
998 | */ | |
999 | if (!s->vex_l) { | |
1000 | int ymmh_ofs = vector_elem_offset(&decode->op[1], MO_128, 1); | |
1001 | tcg_gen_gvec_dup_imm(MO_64, ymmh_ofs, 16, 16, 0); | |
1002 | } | |
16fc5726 PB |
1003 | } |
1004 | #define VSIB_AVX(uname, lname) \ | |
1005 | static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ | |
1006 | { \ | |
1007 | gen_vsib_avx(s, env, decode, \ | |
1008 | gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ | |
1009 | gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ | |
1010 | } | |
1011 | VSIB_AVX(VPGATHERD, vpgatherd) | |
1012 | VSIB_AVX(VPGATHERQ, vpgatherq) | |
1013 | ||
1d0b9261 PB |
1014 | static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) |
1015 | { | |
60c7dd22 | 1016 | int opposite_cc_op; |
1d0b9261 PB |
1017 | TCGv carry_in = NULL; |
1018 | TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); | |
1019 | TCGv zero; | |
1020 | ||
1021 | if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { | |
1022 | /* Re-use the carry-out from a previous round. */ | |
1023 | carry_in = carry_out; | |
60c7dd22 PB |
1024 | } else { |
1025 | /* We don't have a carry-in, get it out of EFLAGS. */ | |
1d0b9261 PB |
1026 | if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { |
1027 | gen_compute_eflags(s); | |
1028 | } | |
1029 | carry_in = s->tmp0; | |
1030 | tcg_gen_extract_tl(carry_in, cpu_cc_src, | |
1031 | ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); | |
1032 | } | |
1033 | ||
1034 | switch (ot) { | |
1035 | #ifdef TARGET_X86_64 | |
1036 | case MO_32: | |
1037 | /* If TL is 64-bit just do everything in 64-bit arithmetic. */ | |
6fbef942 RH |
1038 | tcg_gen_ext32u_tl(s->T0, s->T0); |
1039 | tcg_gen_ext32u_tl(s->T1, s->T1); | |
1d0b9261 PB |
1040 | tcg_gen_add_i64(s->T0, s->T0, s->T1); |
1041 | tcg_gen_add_i64(s->T0, s->T0, carry_in); | |
1042 | tcg_gen_shri_i64(carry_out, s->T0, 32); | |
1043 | break; | |
1044 | #endif | |
1045 | default: | |
1046 | zero = tcg_constant_tl(0); | |
1047 | tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero); | |
1048 | tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); | |
1049 | break; | |
1050 | } | |
60c7dd22 PB |
1051 | |
1052 | opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; | |
1053 | if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { | |
1054 | /* Merge with the carry-out from the opposite instruction. */ | |
1055 | set_cc_op(s, CC_OP_ADCOX); | |
1056 | } else { | |
1057 | set_cc_op(s, cc_op); | |
1058 | } | |
1d0b9261 PB |
1059 | } |
1060 | ||
1061 | static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1062 | { | |
1063 | gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); | |
1064 | } | |
1065 | ||
1066 | static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1067 | { | |
1068 | gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); | |
1069 | } | |
1070 | ||
1071 | static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1072 | { | |
1073 | MemOp ot = decode->op[0].ot; | |
1074 | ||
1075 | tcg_gen_andc_tl(s->T0, s->T1, s->T0); | |
1076 | gen_op_update1_cc(s); | |
1077 | set_cc_op(s, CC_OP_LOGICB + ot); | |
1078 | } | |
1079 | ||
1080 | static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1081 | { | |
1082 | MemOp ot = decode->op[0].ot; | |
b14c0098 RH |
1083 | TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); |
1084 | TCGv zero = tcg_constant_tl(0); | |
1085 | TCGv mone = tcg_constant_tl(-1); | |
1d0b9261 PB |
1086 | |
1087 | /* | |
1088 | * Extract START, and shift the operand. | |
1089 | * Shifts larger than operand size get zeros. | |
1090 | */ | |
1091 | tcg_gen_ext8u_tl(s->A0, s->T1); | |
b14c0098 RH |
1092 | if (TARGET_LONG_BITS == 64 && ot == MO_32) { |
1093 | tcg_gen_ext32u_tl(s->T0, s->T0); | |
1094 | } | |
1d0b9261 PB |
1095 | tcg_gen_shr_tl(s->T0, s->T0, s->A0); |
1096 | ||
1d0b9261 PB |
1097 | tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); |
1098 | ||
1099 | /* | |
b14c0098 RH |
1100 | * Extract the LEN into an inverse mask. Lengths larger than |
1101 | * operand size get all zeros, length 0 gets all ones. | |
1d0b9261 PB |
1102 | */ |
1103 | tcg_gen_extract_tl(s->A0, s->T1, 8, 8); | |
b14c0098 RH |
1104 | tcg_gen_shl_tl(s->T1, mone, s->A0); |
1105 | tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); | |
1106 | tcg_gen_andc_tl(s->T0, s->T0, s->T1); | |
1d0b9261 PB |
1107 | |
1108 | gen_op_update1_cc(s); | |
1109 | set_cc_op(s, CC_OP_LOGICB + ot); | |
1110 | } | |
1111 | ||
1112 | static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1113 | { | |
1114 | MemOp ot = decode->op[0].ot; | |
1115 | ||
99282098 | 1116 | tcg_gen_mov_tl(cpu_cc_src, s->T0); |
1d0b9261 PB |
1117 | tcg_gen_neg_tl(s->T1, s->T0); |
1118 | tcg_gen_and_tl(s->T0, s->T0, s->T1); | |
1119 | tcg_gen_mov_tl(cpu_cc_dst, s->T0); | |
1120 | set_cc_op(s, CC_OP_BMILGB + ot); | |
1121 | } | |
1122 | ||
1123 | static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1124 | { | |
1125 | MemOp ot = decode->op[0].ot; | |
1126 | ||
99282098 | 1127 | tcg_gen_mov_tl(cpu_cc_src, s->T0); |
1d0b9261 PB |
1128 | tcg_gen_subi_tl(s->T1, s->T0, 1); |
1129 | tcg_gen_xor_tl(s->T0, s->T0, s->T1); | |
1130 | tcg_gen_mov_tl(cpu_cc_dst, s->T0); | |
1131 | set_cc_op(s, CC_OP_BMILGB + ot); | |
1132 | } | |
1133 | ||
1134 | static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1135 | { | |
1136 | MemOp ot = decode->op[0].ot; | |
1137 | ||
99282098 | 1138 | tcg_gen_mov_tl(cpu_cc_src, s->T0); |
1d0b9261 PB |
1139 | tcg_gen_subi_tl(s->T1, s->T0, 1); |
1140 | tcg_gen_and_tl(s->T0, s->T0, s->T1); | |
1141 | tcg_gen_mov_tl(cpu_cc_dst, s->T0); | |
1142 | set_cc_op(s, CC_OP_BMILGB + ot); | |
1143 | } | |
1144 | ||
1145 | static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1146 | { | |
1147 | MemOp ot = decode->op[0].ot; | |
9ad2ba6e RH |
1148 | TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); |
1149 | TCGv zero = tcg_constant_tl(0); | |
1150 | TCGv mone = tcg_constant_tl(-1); | |
1d0b9261 | 1151 | |
9ad2ba6e | 1152 | tcg_gen_ext8u_tl(s->T1, s->T1); |
1d0b9261 PB |
1153 | |
1154 | /* | |
1155 | * Note that since we're using BMILG (in order to get O | |
1156 | * cleared) we need to store the inverse into C. | |
1157 | */ | |
9ad2ba6e | 1158 | tcg_gen_setcond_tl(TCG_COND_LEU, cpu_cc_src, s->T1, bound); |
1d0b9261 | 1159 | |
9ad2ba6e RH |
1160 | tcg_gen_shl_tl(s->A0, mone, s->T1); |
1161 | tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); | |
1d0b9261 PB |
1162 | tcg_gen_andc_tl(s->T0, s->T0, s->A0); |
1163 | ||
1164 | gen_op_update1_cc(s); | |
1165 | set_cc_op(s, CC_OP_BMILGB + ot); | |
1166 | } | |
1167 | ||
1168 | static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1169 | { | |
1170 | MemOp ot = decode->op[2].ot; | |
1171 | ||
1172 | tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); | |
1173 | gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); | |
1174 | } | |
1175 | ||
f8d19eec PB |
1176 | static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1177 | { | |
ad75a51e | 1178 | gen_helper_enter_mmx(tcg_env); |
f8d19eec | 1179 | if (s->prefix & PREFIX_DATA) { |
ad75a51e | 1180 | gen_helper_cvtpi2pd(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec | 1181 | } else { |
ad75a51e | 1182 | gen_helper_cvtpi2ps(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec PB |
1183 | } |
1184 | } | |
1185 | ||
1186 | static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1187 | { | |
ad75a51e | 1188 | gen_helper_enter_mmx(tcg_env); |
f8d19eec | 1189 | if (s->prefix & PREFIX_DATA) { |
ad75a51e | 1190 | gen_helper_cvtpd2pi(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec | 1191 | } else { |
ad75a51e | 1192 | gen_helper_cvtps2pi(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec PB |
1193 | } |
1194 | } | |
1195 | ||
1196 | static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1197 | { | |
ad75a51e | 1198 | gen_helper_enter_mmx(tcg_env); |
f8d19eec | 1199 | if (s->prefix & PREFIX_DATA) { |
ad75a51e | 1200 | gen_helper_cvttpd2pi(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec | 1201 | } else { |
ad75a51e | 1202 | gen_helper_cvttps2pi(tcg_env, OP_PTR0, OP_PTR2); |
f8d19eec PB |
1203 | } |
1204 | } | |
1205 | ||
ce4fcb94 PB |
1206 | static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1207 | { | |
ad75a51e | 1208 | gen_helper_emms(tcg_env); |
ce4fcb94 PB |
1209 | } |
1210 | ||
d1c1a422 PB |
1211 | static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1212 | { | |
1213 | TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); | |
1214 | TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); | |
1215 | ||
ad75a51e | 1216 | gen_helper_extrq_i(tcg_env, OP_PTR0, index, length); |
d1c1a422 PB |
1217 | } |
1218 | ||
1219 | static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1220 | { | |
ad75a51e | 1221 | gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); |
d1c1a422 PB |
1222 | } |
1223 | ||
1224 | static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1225 | { | |
1226 | TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); | |
1227 | TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); | |
1228 | ||
ad75a51e | 1229 | gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length); |
d1c1a422 PB |
1230 | } |
1231 | ||
1232 | static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1233 | { | |
ad75a51e | 1234 | gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2); |
d1c1a422 PB |
1235 | } |
1236 | ||
57f6bba0 PB |
1237 | static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1238 | { | |
1239 | if (s->vex_l) { | |
1240 | gen_illegal_opcode(s); | |
1241 | return; | |
1242 | } | |
1243 | tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); | |
ad75a51e | 1244 | gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); |
57f6bba0 PB |
1245 | } |
1246 | ||
6bbeb98d PB |
1247 | static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1248 | { | |
1249 | tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); | |
1250 | gen_extu(s->aflag, s->A0); | |
1251 | gen_add_A0_ds_seg(s); | |
1252 | ||
1253 | if (s->prefix & PREFIX_DATA) { | |
ad75a51e | 1254 | gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); |
6bbeb98d | 1255 | } else { |
ad75a51e | 1256 | gen_helper_maskmov_mmx(tcg_env, OP_PTR1, OP_PTR2, s->A0); |
6bbeb98d PB |
1257 | } |
1258 | } | |
1259 | ||
1d0b9261 PB |
1260 | static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1261 | { | |
1262 | MemOp ot = decode->op[0].ot; | |
1263 | ||
1264 | /* M operand type does not load/store */ | |
1265 | if (decode->e.op0 == X86_TYPE_M) { | |
1266 | tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); | |
1267 | } else { | |
1268 | tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); | |
1269 | } | |
1270 | } | |
1271 | ||
d1c1a422 PB |
1272 | static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1273 | { | |
1274 | MemOp ot = decode->op[2].ot; | |
1275 | ||
1276 | switch (ot) { | |
1277 | case MO_32: | |
1278 | #ifdef TARGET_X86_64 | |
ad75a51e | 1279 | tcg_gen_ld32u_tl(s->T0, tcg_env, decode->op[2].offset); |
d1c1a422 PB |
1280 | break; |
1281 | case MO_64: | |
1282 | #endif | |
ad75a51e | 1283 | tcg_gen_ld_tl(s->T0, tcg_env, decode->op[2].offset); |
d1c1a422 PB |
1284 | break; |
1285 | default: | |
1286 | abort(); | |
1287 | } | |
1288 | } | |
1289 | ||
92ec056a PB |
1290 | static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1291 | { | |
1292 | MemOp ot = decode->op[2].ot; | |
1293 | int vec_len = vector_len(s, decode); | |
1294 | int lo_ofs = vector_elem_offset(&decode->op[0], ot, 0); | |
1295 | ||
1296 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1297 | ||
1298 | switch (ot) { | |
1299 | case MO_32: | |
1300 | #ifdef TARGET_X86_64 | |
ad75a51e | 1301 | tcg_gen_st32_tl(s->T1, tcg_env, lo_ofs); |
92ec056a PB |
1302 | break; |
1303 | case MO_64: | |
1304 | #endif | |
ad75a51e | 1305 | tcg_gen_st_tl(s->T1, tcg_env, lo_ofs); |
92ec056a PB |
1306 | break; |
1307 | default: | |
1308 | g_assert_not_reached(); | |
1309 | } | |
1310 | } | |
1311 | ||
1312 | static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1313 | { | |
1314 | gen_store_sse(s, decode, decode->op[2].offset); | |
1315 | } | |
1316 | ||
03b45880 PB |
1317 | static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1318 | { | |
1319 | typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; | |
1320 | ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; | |
1321 | pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; | |
1322 | fn = s->prefix & PREFIX_DATA ? pd : ps; | |
ad75a51e | 1323 | fn(s->tmp2_i32, tcg_env, OP_PTR2); |
03b45880 PB |
1324 | tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); |
1325 | } | |
1326 | ||
d1c1a422 PB |
1327 | static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1328 | { | |
1329 | int vec_len = vector_len(s, decode); | |
1330 | int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0); | |
1331 | ||
ad75a51e | 1332 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); |
6bbeb98d PB |
1333 | if (decode->op[0].has_ea) { |
1334 | tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); | |
1335 | } else { | |
1336 | /* | |
1337 | * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64) would | |
1338 | * seem to work, but it does not on big-endian platforms; the cleared parts | |
1339 | * are always at higher addresses, but cross-endian emulation inverts the | |
1340 | * byte order so that the cleared parts need to be at *lower* addresses. | |
1341 | * Because oprsz is 8, we see this here even for SSE; but more in general, | |
1342 | * it disqualifies using oprsz < maxsz to emulate VEX128. | |
1343 | */ | |
1344 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
ad75a51e | 1345 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, lo_ofs); |
6bbeb98d PB |
1346 | } |
1347 | } | |
1348 | ||
1349 | static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1350 | { | |
ad75a51e | 1351 | gen_helper_enter_mmx(tcg_env); |
6bbeb98d PB |
1352 | /* Otherwise the same as any other movq. */ |
1353 | return gen_MOVQ(s, env, decode); | |
d1c1a422 PB |
1354 | } |
1355 | ||
1d0b9261 PB |
1356 | static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1357 | { | |
1358 | MemOp ot = decode->op[0].ot; | |
1359 | ||
1360 | /* low part of result in VEX.vvvv, high in MODRM */ | |
1361 | switch (ot) { | |
1362 | default: | |
1363 | tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); | |
1364 | tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); | |
1365 | tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, | |
1366 | s->tmp2_i32, s->tmp3_i32); | |
1367 | tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); | |
1368 | tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); | |
1369 | break; | |
1370 | #ifdef TARGET_X86_64 | |
1371 | case MO_64: | |
1372 | tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); | |
1373 | break; | |
1374 | #endif | |
1375 | } | |
1376 | ||
1377 | } | |
1378 | ||
79068477 PB |
1379 | static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1380 | { | |
1381 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
1382 | if (!(s->prefix & PREFIX_DATA)) { | |
ad75a51e | 1383 | gen_helper_palignr_mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 | 1384 | } else if (!s->vex_l) { |
ad75a51e | 1385 | gen_helper_palignr_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 | 1386 | } else { |
ad75a51e | 1387 | gen_helper_palignr_ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
1388 | } |
1389 | } | |
1390 | ||
1d0efbdb PB |
1391 | static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1392 | { | |
1393 | int vec_len = vector_len(s, decode); | |
1394 | ||
1395 | /* Careful, operand order is reversed! */ | |
1396 | tcg_gen_gvec_andc(MO_64, | |
1397 | decode->op[0].offset, decode->op[2].offset, | |
1398 | decode->op[1].offset, vec_len, vec_len); | |
1399 | } | |
1400 | ||
79068477 PB |
1401 | static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1402 | { | |
1403 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
ad75a51e | 1404 | gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
1405 | set_cc_op(s, CC_OP_EFLAGS); |
1406 | } | |
1407 | ||
1408 | static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1409 | { | |
1410 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
ad75a51e | 1411 | gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
1412 | set_cc_op(s, CC_OP_EFLAGS); |
1413 | if ((s->prefix & PREFIX_VEX) && !s->vex_l) { | |
1414 | tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), | |
1415 | 16, 16, 0); | |
1416 | } | |
1417 | } | |
1418 | ||
1419 | static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1420 | { | |
1421 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
ad75a51e | 1422 | gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
1423 | set_cc_op(s, CC_OP_EFLAGS); |
1424 | } | |
1425 | ||
1426 | static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1427 | { | |
1428 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
ad75a51e | 1429 | gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
1430 | set_cc_op(s, CC_OP_EFLAGS); |
1431 | if ((s->prefix & PREFIX_VEX) && !s->vex_l) { | |
1432 | tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), | |
1433 | 16, 16, 0); | |
1434 | } | |
1435 | } | |
1436 | ||
1d0b9261 PB |
1437 | static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1438 | { | |
1439 | MemOp ot = decode->op[1].ot; | |
1440 | if (ot < MO_64) { | |
1441 | tcg_gen_ext32u_tl(s->T0, s->T0); | |
1442 | } | |
1443 | gen_helper_pdep(s->T0, s->T0, s->T1); | |
1444 | } | |
1445 | ||
1446 | static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1447 | { | |
1448 | MemOp ot = decode->op[1].ot; | |
1449 | if (ot < MO_64) { | |
1450 | tcg_gen_ext32u_tl(s->T0, s->T0); | |
1451 | } | |
1452 | gen_helper_pext(s->T0, s->T0, s->T1); | |
1453 | } | |
1454 | ||
79068477 PB |
1455 | static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) |
1456 | { | |
1457 | int vec_len = vector_len(s, decode); | |
1458 | int mask = (vec_len >> ot) - 1; | |
1459 | int val = decode->immediate & mask; | |
1460 | ||
1461 | switch (ot) { | |
1462 | case MO_8: | |
ad75a51e | 1463 | tcg_gen_ld8u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); |
79068477 PB |
1464 | break; |
1465 | case MO_16: | |
ad75a51e | 1466 | tcg_gen_ld16u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); |
79068477 PB |
1467 | break; |
1468 | case MO_32: | |
1469 | #ifdef TARGET_X86_64 | |
ad75a51e | 1470 | tcg_gen_ld32u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); |
79068477 PB |
1471 | break; |
1472 | case MO_64: | |
1473 | #endif | |
ad75a51e | 1474 | tcg_gen_ld_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); |
79068477 PB |
1475 | break; |
1476 | default: | |
1477 | abort(); | |
1478 | } | |
1479 | } | |
1480 | ||
1481 | static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1482 | { | |
1483 | gen_pextr(s, env, decode, MO_8); | |
1484 | } | |
1485 | ||
1486 | static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1487 | { | |
1488 | gen_pextr(s, env, decode, MO_16); | |
1489 | } | |
1490 | ||
1491 | static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1492 | { | |
1493 | MemOp ot = decode->op[0].ot; | |
1494 | gen_pextr(s, env, decode, ot); | |
1495 | } | |
1496 | ||
1497 | static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) | |
1498 | { | |
1499 | int vec_len = vector_len(s, decode); | |
1500 | int mask = (vec_len >> ot) - 1; | |
1501 | int val = decode->immediate & mask; | |
1502 | ||
1503 | if (decode->op[1].offset != decode->op[0].offset) { | |
1504 | assert(vec_len == 16); | |
1505 | gen_store_sse(s, decode, decode->op[1].offset); | |
1506 | } | |
1507 | ||
1508 | switch (ot) { | |
1509 | case MO_8: | |
ad75a51e | 1510 | tcg_gen_st8_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); |
79068477 PB |
1511 | break; |
1512 | case MO_16: | |
ad75a51e | 1513 | tcg_gen_st16_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); |
79068477 PB |
1514 | break; |
1515 | case MO_32: | |
1516 | #ifdef TARGET_X86_64 | |
ad75a51e | 1517 | tcg_gen_st32_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); |
79068477 PB |
1518 | break; |
1519 | case MO_64: | |
1520 | #endif | |
ad75a51e | 1521 | tcg_gen_st_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); |
79068477 PB |
1522 | break; |
1523 | default: | |
1524 | abort(); | |
1525 | } | |
1526 | } | |
1527 | ||
1528 | static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1529 | { | |
1530 | gen_pinsr(s, env, decode, MO_8); | |
1531 | } | |
1532 | ||
aba2b8ec PB |
1533 | static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1534 | { | |
1535 | gen_pinsr(s, env, decode, MO_16); | |
1536 | } | |
1537 | ||
79068477 PB |
1538 | static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1539 | { | |
1540 | gen_pinsr(s, env, decode, decode->op[2].ot); | |
1541 | } | |
1542 | ||
d4af67a2 RH |
1543 | static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s) |
1544 | { | |
1545 | TCGv_i64 t = tcg_temp_new_i64(); | |
1546 | ||
1547 | tcg_gen_andi_i64(d, s, 0x8080808080808080ull); | |
1548 | ||
1549 | /* | |
1550 | * After each shift+or pair: | |
1551 | * 0: a.......b.......c.......d.......e.......f.......g.......h....... | |
1552 | * 7: ab......bc......cd......de......ef......fg......gh......h....... | |
1553 | * 14: abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... | |
1554 | * 28: abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... | |
1555 | * The result is left in the high bits of the word. | |
1556 | */ | |
1557 | tcg_gen_shli_i64(t, d, 7); | |
1558 | tcg_gen_or_i64(d, d, t); | |
1559 | tcg_gen_shli_i64(t, d, 14); | |
1560 | tcg_gen_or_i64(d, d, t); | |
1561 | tcg_gen_shli_i64(t, d, 28); | |
1562 | tcg_gen_or_i64(d, d, t); | |
1563 | } | |
1564 | ||
1565 | static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) | |
1566 | { | |
1567 | TCGv_vec t = tcg_temp_new_vec_matching(d); | |
1568 | TCGv_vec m = tcg_constant_vec_matching(d, MO_8, 0x80); | |
1569 | ||
1570 | /* See above */ | |
1571 | tcg_gen_and_vec(vece, d, s, m); | |
1572 | tcg_gen_shli_vec(vece, t, d, 7); | |
1573 | tcg_gen_or_vec(vece, d, d, t); | |
1574 | tcg_gen_shli_vec(vece, t, d, 14); | |
1575 | tcg_gen_or_vec(vece, d, d, t); | |
1576 | tcg_gen_shli_vec(vece, t, d, 28); | |
1577 | tcg_gen_or_vec(vece, d, d, t); | |
1578 | } | |
1579 | ||
1580 | #ifdef TARGET_X86_64 | |
1581 | #define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 | |
1582 | #else | |
1583 | #define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 | |
1584 | #endif | |
1585 | ||
6bbeb98d PB |
1586 | static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1587 | { | |
d4af67a2 RH |
1588 | static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; |
1589 | static const GVecGen2 g = { | |
1590 | .fni8 = gen_pmovmskb_i64, | |
1591 | .fniv = gen_pmovmskb_vec, | |
1592 | .opt_opc = vecop_list, | |
1593 | .vece = MO_64, | |
1594 | .prefer_i64 = TCG_TARGET_REG_BITS == 64 | |
1595 | }; | |
1596 | MemOp ot = decode->op[2].ot; | |
1597 | int vec_len = vector_len(s, decode); | |
1598 | TCGv t = tcg_temp_new(); | |
1599 | ||
1600 | tcg_gen_gvec_2(offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), decode->op[2].offset, | |
1601 | vec_len, vec_len, &g); | |
ad75a51e | 1602 | tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); |
d4af67a2 RH |
1603 | while (vec_len > 8) { |
1604 | vec_len -= 8; | |
1605 | if (TCG_TARGET_HAS_extract2_tl) { | |
1606 | /* | |
1607 | * Load the next byte of the result into the high byte of T. | |
1608 | * TCG does a similar expansion of deposit to shl+extract2; by | |
1609 | * loading the whole word, the shift left is avoided. | |
1610 | */ | |
1611 | #ifdef TARGET_X86_64 | |
ad75a51e | 1612 | tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_Q((vec_len - 1) / 8))); |
d4af67a2 | 1613 | #else |
ad75a51e | 1614 | tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L((vec_len - 1) / 4))); |
d4af67a2 RH |
1615 | #endif |
1616 | ||
1617 | tcg_gen_extract2_tl(s->T0, t, s->T0, TARGET_LONG_BITS - 8); | |
1618 | } else { | |
1619 | /* | |
1620 | * The _previous_ value is deposited into bits 8 and higher of t. Because | |
1621 | * those bits are known to be zero after ld8u, this becomes a shift+or | |
1622 | * if deposit is not available. | |
1623 | */ | |
ad75a51e | 1624 | tcg_gen_ld8u_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); |
d4af67a2 RH |
1625 | tcg_gen_deposit_tl(s->T0, t, s->T0, 8, TARGET_LONG_BITS - 8); |
1626 | } | |
6bbeb98d | 1627 | } |
6bbeb98d PB |
1628 | } |
1629 | ||
ce4fcb94 PB |
1630 | static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1631 | { | |
1632 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
1633 | gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm); | |
1634 | } | |
1635 | ||
1636 | static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1637 | { | |
1638 | int vec_len = vector_len(s, decode); | |
1639 | ||
1640 | if (decode->immediate >= 16) { | |
1641 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1642 | } else { | |
1643 | tcg_gen_gvec_shri(MO_16, | |
1644 | decode->op[0].offset, decode->op[1].offset, | |
1645 | decode->immediate, vec_len, vec_len); | |
1646 | } | |
1647 | } | |
1648 | ||
1649 | static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1650 | { | |
1651 | int vec_len = vector_len(s, decode); | |
1652 | ||
1653 | if (decode->immediate >= 16) { | |
1654 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1655 | } else { | |
1656 | tcg_gen_gvec_shli(MO_16, | |
1657 | decode->op[0].offset, decode->op[1].offset, | |
1658 | decode->immediate, vec_len, vec_len); | |
1659 | } | |
1660 | } | |
1661 | ||
1662 | static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1663 | { | |
1664 | int vec_len = vector_len(s, decode); | |
1665 | ||
1666 | if (decode->immediate >= 16) { | |
1667 | decode->immediate = 15; | |
1668 | } | |
1669 | tcg_gen_gvec_sari(MO_16, | |
1670 | decode->op[0].offset, decode->op[1].offset, | |
1671 | decode->immediate, vec_len, vec_len); | |
1672 | } | |
1673 | ||
1674 | static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1675 | { | |
1676 | int vec_len = vector_len(s, decode); | |
1677 | ||
1678 | if (decode->immediate >= 32) { | |
1679 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1680 | } else { | |
1681 | tcg_gen_gvec_shri(MO_32, | |
1682 | decode->op[0].offset, decode->op[1].offset, | |
1683 | decode->immediate, vec_len, vec_len); | |
1684 | } | |
1685 | } | |
1686 | ||
1687 | static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1688 | { | |
1689 | int vec_len = vector_len(s, decode); | |
1690 | ||
1691 | if (decode->immediate >= 32) { | |
1692 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1693 | } else { | |
1694 | tcg_gen_gvec_shli(MO_32, | |
1695 | decode->op[0].offset, decode->op[1].offset, | |
1696 | decode->immediate, vec_len, vec_len); | |
1697 | } | |
1698 | } | |
1699 | ||
1700 | static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1701 | { | |
1702 | int vec_len = vector_len(s, decode); | |
1703 | ||
1704 | if (decode->immediate >= 32) { | |
1705 | decode->immediate = 31; | |
1706 | } | |
1707 | tcg_gen_gvec_sari(MO_32, | |
1708 | decode->op[0].offset, decode->op[1].offset, | |
1709 | decode->immediate, vec_len, vec_len); | |
1710 | } | |
1711 | ||
1712 | static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1713 | { | |
1714 | int vec_len = vector_len(s, decode); | |
1715 | ||
1716 | if (decode->immediate >= 64) { | |
1717 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1718 | } else { | |
1719 | tcg_gen_gvec_shri(MO_64, | |
1720 | decode->op[0].offset, decode->op[1].offset, | |
1721 | decode->immediate, vec_len, vec_len); | |
1722 | } | |
1723 | } | |
1724 | ||
1725 | static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1726 | { | |
1727 | int vec_len = vector_len(s, decode); | |
1728 | ||
1729 | if (decode->immediate >= 64) { | |
1730 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
1731 | } else { | |
1732 | tcg_gen_gvec_shli(MO_64, | |
1733 | decode->op[0].offset, decode->op[1].offset, | |
1734 | decode->immediate, vec_len, vec_len); | |
1735 | } | |
1736 | } | |
1737 | ||
1738 | static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len) | |
1739 | { | |
1740 | MemOp ot = vec_len == 16 ? MO_128 : MO_256; | |
1741 | TCGv_i32 imm_v = tcg_constant8u_i32(imm); | |
1742 | TCGv_ptr ptr = tcg_temp_new_ptr(); | |
1743 | ||
1744 | tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), | |
1745 | vec_len, vec_len, 0); | |
1746 | ||
ad75a51e RH |
1747 | tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_t0)); |
1748 | tcg_gen_st_i32(imm_v, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L(0))); | |
ce4fcb94 PB |
1749 | return ptr; |
1750 | } | |
1751 | ||
1752 | static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1753 | { | |
1754 | int vec_len = vector_len(s, decode); | |
1755 | TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); | |
1756 | ||
1757 | if (s->vex_l) { | |
ad75a51e | 1758 | gen_helper_psrldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); |
ce4fcb94 | 1759 | } else { |
ad75a51e | 1760 | gen_helper_psrldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); |
ce4fcb94 | 1761 | } |
ce4fcb94 PB |
1762 | } |
1763 | ||
1764 | static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1765 | { | |
1766 | int vec_len = vector_len(s, decode); | |
1767 | TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); | |
1768 | ||
1769 | if (s->vex_l) { | |
ad75a51e | 1770 | gen_helper_pslldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); |
ce4fcb94 | 1771 | } else { |
ad75a51e | 1772 | gen_helper_pslldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); |
ce4fcb94 | 1773 | } |
ce4fcb94 PB |
1774 | } |
1775 | ||
1d0b9261 PB |
1776 | static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1777 | { | |
1778 | MemOp ot = decode->op[0].ot; | |
1779 | int b = decode->immediate; | |
1780 | ||
1781 | if (ot == MO_64) { | |
1782 | tcg_gen_rotri_tl(s->T0, s->T0, b & 63); | |
1783 | } else { | |
1784 | tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); | |
1785 | tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); | |
1786 | tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); | |
1787 | } | |
1788 | } | |
1789 | ||
1790 | static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1791 | { | |
1792 | MemOp ot = decode->op[0].ot; | |
1793 | int mask; | |
1794 | ||
1795 | mask = ot == MO_64 ? 63 : 31; | |
1796 | tcg_gen_andi_tl(s->T1, s->T1, mask); | |
1797 | if (ot != MO_64) { | |
1798 | tcg_gen_ext32s_tl(s->T0, s->T0); | |
1799 | } | |
1800 | tcg_gen_sar_tl(s->T0, s->T0, s->T1); | |
1801 | } | |
1802 | ||
e582b629 PB |
1803 | static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1804 | { | |
1805 | gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); | |
1806 | } | |
1807 | ||
1808 | static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1809 | { | |
1810 | gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2); | |
1811 | } | |
1812 | ||
1813 | static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1814 | { | |
1815 | gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2); | |
1816 | } | |
1817 | ||
1818 | static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1819 | { | |
1820 | switch(decode->immediate & 3) { | |
1821 | case 0: | |
1822 | gen_helper_sha1rnds4_f0(OP_PTR0, OP_PTR0, OP_PTR1); | |
1823 | break; | |
1824 | case 1: | |
1825 | gen_helper_sha1rnds4_f1(OP_PTR0, OP_PTR0, OP_PTR1); | |
1826 | break; | |
1827 | case 2: | |
1828 | gen_helper_sha1rnds4_f2(OP_PTR0, OP_PTR0, OP_PTR1); | |
1829 | break; | |
1830 | case 3: | |
1831 | gen_helper_sha1rnds4_f3(OP_PTR0, OP_PTR0, OP_PTR1); | |
1832 | break; | |
1833 | } | |
1834 | } | |
1835 | ||
1836 | static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1837 | { | |
1838 | gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2); | |
1839 | } | |
1840 | ||
1841 | static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1842 | { | |
1843 | gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2); | |
1844 | } | |
1845 | ||
1846 | static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1847 | { | |
1848 | TCGv_i32 wk0 = tcg_temp_new_i32(); | |
1849 | TCGv_i32 wk1 = tcg_temp_new_i32(); | |
1850 | ||
1851 | tcg_gen_ld_i32(wk0, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(0))); | |
1852 | tcg_gen_ld_i32(wk1, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(1))); | |
1853 | ||
1854 | gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1); | |
1855 | } | |
1856 | ||
1d0b9261 PB |
1857 | static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1858 | { | |
1859 | MemOp ot = decode->op[0].ot; | |
1860 | int mask; | |
1861 | ||
1862 | mask = ot == MO_64 ? 63 : 31; | |
1863 | tcg_gen_andi_tl(s->T1, s->T1, mask); | |
1864 | tcg_gen_shl_tl(s->T0, s->T0, s->T1); | |
1865 | } | |
1866 | ||
1867 | static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1868 | { | |
1869 | MemOp ot = decode->op[0].ot; | |
1870 | int mask; | |
1871 | ||
1872 | mask = ot == MO_64 ? 63 : 31; | |
1873 | tcg_gen_andi_tl(s->T1, s->T1, mask); | |
1874 | if (ot != MO_64) { | |
1875 | tcg_gen_ext32u_tl(s->T0, s->T0); | |
1876 | } | |
1877 | tcg_gen_shr_tl(s->T0, s->T0, s->T1); | |
1878 | } | |
03b45880 | 1879 | |
79068477 PB |
1880 | static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1881 | { | |
1882 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
1883 | assert(!s->vex_l); | |
ad75a51e | 1884 | gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm); |
79068477 PB |
1885 | } |
1886 | ||
57f6bba0 PB |
1887 | static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1888 | { | |
1889 | if (s->vex_l) { | |
1890 | gen_illegal_opcode(s); | |
1891 | return; | |
1892 | } | |
ad75a51e RH |
1893 | gen_helper_update_mxcsr(tcg_env); |
1894 | tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); | |
57f6bba0 PB |
1895 | } |
1896 | ||
16fc5726 PB |
1897 | static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1898 | { | |
1899 | assert(!s->vex_l); | |
ad75a51e | 1900 | gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2); |
16fc5726 PB |
1901 | } |
1902 | ||
aba2b8ec PB |
1903 | /* |
1904 | * 00 = v*ps Vps, Hps, Wpd | |
1905 | * 66 = v*pd Vpd, Hpd, Wps | |
1906 | * f3 = v*ss Vss, Hss, Wps | |
1907 | * f2 = v*sd Vsd, Hsd, Wps | |
1908 | */ | |
1909 | #define SSE_CMP(x) { \ | |
1910 | gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ | |
1911 | gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, \ | |
1912 | gen_helper_ ## x ## ps ## _ymm, gen_helper_ ## x ## pd ## _ymm} | |
1913 | static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] = { | |
1914 | SSE_CMP(cmpeq), | |
1915 | SSE_CMP(cmplt), | |
1916 | SSE_CMP(cmple), | |
1917 | SSE_CMP(cmpunord), | |
1918 | SSE_CMP(cmpneq), | |
1919 | SSE_CMP(cmpnlt), | |
1920 | SSE_CMP(cmpnle), | |
1921 | SSE_CMP(cmpord), | |
1922 | ||
1923 | SSE_CMP(cmpequ), | |
1924 | SSE_CMP(cmpnge), | |
1925 | SSE_CMP(cmpngt), | |
1926 | SSE_CMP(cmpfalse), | |
1927 | SSE_CMP(cmpnequ), | |
1928 | SSE_CMP(cmpge), | |
1929 | SSE_CMP(cmpgt), | |
1930 | SSE_CMP(cmptrue), | |
1931 | ||
1932 | SSE_CMP(cmpeqs), | |
1933 | SSE_CMP(cmpltq), | |
1934 | SSE_CMP(cmpleq), | |
1935 | SSE_CMP(cmpunords), | |
1936 | SSE_CMP(cmpneqq), | |
1937 | SSE_CMP(cmpnltq), | |
1938 | SSE_CMP(cmpnleq), | |
1939 | SSE_CMP(cmpords), | |
1940 | ||
1941 | SSE_CMP(cmpequs), | |
1942 | SSE_CMP(cmpngeq), | |
1943 | SSE_CMP(cmpngtq), | |
1944 | SSE_CMP(cmpfalses), | |
1945 | SSE_CMP(cmpnequs), | |
1946 | SSE_CMP(cmpgeq), | |
1947 | SSE_CMP(cmpgtq), | |
1948 | SSE_CMP(cmptrues), | |
1949 | }; | |
1950 | #undef SSE_CMP | |
1951 | ||
1952 | static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1953 | { | |
1954 | int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7); | |
1955 | int b = | |
1956 | s->prefix & PREFIX_REPZ ? 2 /* ss */ : | |
1957 | s->prefix & PREFIX_REPNZ ? 3 /* sd */ : | |
1958 | !!(s->prefix & PREFIX_DATA) /* pd */ + (s->vex_l << 2); | |
1959 | ||
ad75a51e | 1960 | gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
aba2b8ec PB |
1961 | } |
1962 | ||
f8d19eec PB |
1963 | static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1964 | { | |
1965 | SSEFunc_0_epp fn; | |
1966 | fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; | |
ad75a51e | 1967 | fn(tcg_env, OP_PTR1, OP_PTR2); |
f8d19eec PB |
1968 | set_cc_op(s, CC_OP_EFLAGS); |
1969 | } | |
1970 | ||
abd41884 | 1971 | static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
03b45880 | 1972 | { |
abd41884 | 1973 | if (s->vex_l) { |
ad75a51e | 1974 | gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2); |
abd41884 | 1975 | } else { |
ad75a51e | 1976 | gen_helper_cvtpd2ps_xmm(tcg_env, OP_PTR0, OP_PTR2); |
abd41884 PB |
1977 | } |
1978 | } | |
1979 | ||
1980 | static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
1981 | { | |
1982 | if (s->vex_l) { | |
ad75a51e | 1983 | gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2); |
abd41884 | 1984 | } else { |
ad75a51e | 1985 | gen_helper_cvtps2pd_xmm(tcg_env, OP_PTR0, OP_PTR2); |
abd41884 | 1986 | } |
03b45880 | 1987 | } |
ce4fcb94 | 1988 | |
cf5ec664 PB |
1989 | static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
1990 | { | |
1991 | gen_unary_imm_fp_sse(s, env, decode, | |
1992 | gen_helper_cvtps2ph_xmm, | |
1993 | gen_helper_cvtps2ph_ymm); | |
1994 | /* | |
1995 | * VCVTPS2PH is the only instruction that performs an operation on a | |
1996 | * register source and then *stores* into memory. | |
1997 | */ | |
1998 | if (decode->op[0].has_ea) { | |
1999 | gen_store_sse(s, decode, decode->op[0].offset); | |
2000 | } | |
2001 | } | |
2002 | ||
abd41884 PB |
2003 | static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2004 | { | |
ad75a51e | 2005 | gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
abd41884 PB |
2006 | } |
2007 | ||
2008 | static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2009 | { | |
ad75a51e | 2010 | gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); |
abd41884 PB |
2011 | } |
2012 | ||
f8d19eec PB |
2013 | static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2014 | { | |
2015 | int vec_len = vector_len(s, decode); | |
2016 | TCGv_i32 in; | |
2017 | ||
2018 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); | |
2019 | ||
2020 | #ifdef TARGET_X86_64 | |
2021 | MemOp ot = decode->op[2].ot; | |
2022 | if (ot == MO_64) { | |
2023 | if (s->prefix & PREFIX_REPNZ) { | |
ad75a51e | 2024 | gen_helper_cvtsq2sd(tcg_env, OP_PTR0, s->T1); |
f8d19eec | 2025 | } else { |
ad75a51e | 2026 | gen_helper_cvtsq2ss(tcg_env, OP_PTR0, s->T1); |
f8d19eec PB |
2027 | } |
2028 | return; | |
2029 | } | |
2030 | in = s->tmp2_i32; | |
2031 | tcg_gen_trunc_tl_i32(in, s->T1); | |
2032 | #else | |
2033 | in = s->T1; | |
2034 | #endif | |
2035 | ||
2036 | if (s->prefix & PREFIX_REPNZ) { | |
ad75a51e | 2037 | gen_helper_cvtsi2sd(tcg_env, OP_PTR0, in); |
f8d19eec | 2038 | } else { |
ad75a51e | 2039 | gen_helper_cvtsi2ss(tcg_env, OP_PTR0, in); |
f8d19eec PB |
2040 | } |
2041 | } | |
2042 | ||
2043 | static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, | |
2044 | SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq, | |
2045 | SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq) | |
2046 | { | |
2047 | TCGv_i32 out; | |
2048 | ||
2049 | #ifdef TARGET_X86_64 | |
2050 | MemOp ot = decode->op[0].ot; | |
2051 | if (ot == MO_64) { | |
2052 | if (s->prefix & PREFIX_REPNZ) { | |
ad75a51e | 2053 | sd2sq(s->T0, tcg_env, OP_PTR2); |
f8d19eec | 2054 | } else { |
ad75a51e | 2055 | ss2sq(s->T0, tcg_env, OP_PTR2); |
f8d19eec PB |
2056 | } |
2057 | return; | |
2058 | } | |
2059 | ||
2060 | out = s->tmp2_i32; | |
2061 | #else | |
2062 | out = s->T0; | |
2063 | #endif | |
2064 | if (s->prefix & PREFIX_REPNZ) { | |
ad75a51e | 2065 | sd2si(out, tcg_env, OP_PTR2); |
f8d19eec | 2066 | } else { |
ad75a51e | 2067 | ss2si(out, tcg_env, OP_PTR2); |
f8d19eec PB |
2068 | } |
2069 | #ifdef TARGET_X86_64 | |
2070 | tcg_gen_extu_i32_tl(s->T0, out); | |
2071 | #endif | |
2072 | } | |
2073 | ||
2074 | #ifndef TARGET_X86_64 | |
2075 | #define gen_helper_cvtss2sq NULL | |
2076 | #define gen_helper_cvtsd2sq NULL | |
2077 | #define gen_helper_cvttss2sq NULL | |
2078 | #define gen_helper_cvttsd2sq NULL | |
2079 | #endif | |
2080 | ||
2081 | static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2082 | { | |
2083 | gen_VCVTtSx2SI(s, env, decode, | |
2084 | gen_helper_cvtss2si, gen_helper_cvtss2sq, | |
2085 | gen_helper_cvtsd2si, gen_helper_cvtsd2sq); | |
2086 | } | |
2087 | ||
2088 | static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2089 | { | |
2090 | gen_VCVTtSx2SI(s, env, decode, | |
2091 | gen_helper_cvttss2si, gen_helper_cvttss2sq, | |
2092 | gen_helper_cvttsd2si, gen_helper_cvttsd2sq); | |
2093 | } | |
2094 | ||
79068477 PB |
2095 | static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2096 | { | |
2097 | int mask = decode->immediate & 1; | |
2098 | int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask); | |
2099 | if (decode->op[0].has_ea) { | |
2100 | /* VEX-only instruction, no alignment requirements. */ | |
2101 | gen_sto_env_A0(s, src_ofs, false); | |
2102 | } else { | |
2103 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, 16, 16); | |
2104 | } | |
2105 | } | |
2106 | ||
2107 | static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2108 | { | |
2109 | gen_pextr(s, env, decode, MO_32); | |
2110 | } | |
2111 | ||
2112 | static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2113 | { | |
2114 | int val = decode->immediate; | |
2115 | int dest_word = (val >> 4) & 3; | |
2116 | int new_mask = (val & 15) | (1 << dest_word); | |
2117 | int vec_len = 16; | |
2118 | ||
2119 | assert(!s->vex_l); | |
2120 | ||
2121 | if (new_mask == 15) { | |
2122 | /* All zeroes except possibly for the inserted element */ | |
2123 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
2124 | } else if (decode->op[1].offset != decode->op[0].offset) { | |
2125 | gen_store_sse(s, decode, decode->op[1].offset); | |
2126 | } | |
2127 | ||
2128 | if (new_mask != (val & 15)) { | |
ad75a51e | 2129 | tcg_gen_st_i32(s->tmp2_i32, tcg_env, |
79068477 PB |
2130 | vector_elem_offset(&decode->op[0], MO_32, dest_word)); |
2131 | } | |
2132 | ||
2133 | if (new_mask != 15) { | |
2134 | TCGv_i32 zero = tcg_constant_i32(0); /* float32_zero */ | |
2135 | int i; | |
2136 | for (i = 0; i < 4; i++) { | |
2137 | if ((val >> i) & 1) { | |
ad75a51e | 2138 | tcg_gen_st_i32(zero, tcg_env, |
79068477 PB |
2139 | vector_elem_offset(&decode->op[0], MO_32, i)); |
2140 | } | |
2141 | } | |
2142 | } | |
2143 | } | |
2144 | ||
2145 | static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2146 | { | |
2147 | int val = decode->immediate; | |
ad75a51e | 2148 | tcg_gen_ld_i32(s->tmp2_i32, tcg_env, |
79068477 PB |
2149 | vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); |
2150 | gen_vinsertps(s, env, decode); | |
2151 | } | |
2152 | ||
2153 | static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2154 | { | |
2155 | tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); | |
2156 | gen_vinsertps(s, env, decode); | |
2157 | } | |
2158 | ||
2159 | static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2160 | { | |
2161 | int mask = decode->immediate & 1; | |
2162 | tcg_gen_gvec_mov(MO_64, | |
2163 | decode->op[0].offset + offsetof(YMMReg, YMM_X(mask)), | |
2164 | decode->op[2].offset + offsetof(YMMReg, YMM_X(0)), 16, 16); | |
2165 | tcg_gen_gvec_mov(MO_64, | |
2166 | decode->op[0].offset + offsetof(YMMReg, YMM_X(!mask)), | |
2167 | decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16); | |
2168 | } | |
2169 | ||
16fc5726 PB |
2170 | static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, |
2171 | SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm) | |
2172 | { | |
2173 | if (!s->vex_l) { | |
ad75a51e | 2174 | xmm(tcg_env, OP_PTR2, OP_PTR1, s->A0); |
16fc5726 | 2175 | } else { |
ad75a51e | 2176 | ymm(tcg_env, OP_PTR2, OP_PTR1, s->A0); |
16fc5726 PB |
2177 | } |
2178 | } | |
2179 | ||
2180 | static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2181 | { | |
2182 | gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm); | |
2183 | } | |
2184 | ||
2185 | static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2186 | { | |
2187 | gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm); | |
2188 | } | |
2189 | ||
7170a17e PB |
2190 | static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2191 | { | |
2192 | gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); | |
2193 | if (decode->op[0].offset != decode->op[1].offset) { | |
ad75a51e RH |
2194 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); |
2195 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); | |
7170a17e PB |
2196 | } |
2197 | } | |
2198 | ||
2199 | static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2200 | { | |
2201 | gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); | |
2202 | } | |
2203 | ||
2204 | static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2205 | { | |
2206 | if (decode->op[0].offset != decode->op[2].offset) { | |
ad75a51e RH |
2207 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); |
2208 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); | |
7170a17e PB |
2209 | } |
2210 | if (decode->op[0].offset != decode->op[1].offset) { | |
ad75a51e RH |
2211 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); |
2212 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); | |
7170a17e PB |
2213 | } |
2214 | } | |
2215 | ||
2216 | static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2217 | { | |
ad75a51e RH |
2218 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); |
2219 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); | |
7170a17e | 2220 | if (decode->op[0].offset != decode->op[1].offset) { |
ad75a51e RH |
2221 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(1))); |
2222 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); | |
7170a17e PB |
2223 | } |
2224 | } | |
2225 | ||
2226 | static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2227 | { | |
ad75a51e RH |
2228 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); |
2229 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); | |
7170a17e | 2230 | if (decode->op[0].offset != decode->op[1].offset) { |
ad75a51e RH |
2231 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); |
2232 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); | |
7170a17e PB |
2233 | } |
2234 | } | |
2235 | ||
2236 | /* | |
2237 | * Note that MOVLPx supports 256-bit operation unlike MOVHLPx, MOVLHPx, MOXHPx. | |
2238 | * Use a gvec move to move everything above the bottom 64 bits. | |
2239 | */ | |
2240 | ||
2241 | static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2242 | { | |
2243 | int vec_len = vector_len(s, decode); | |
2244 | ||
ad75a51e | 2245 | tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(0))); |
7170a17e | 2246 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); |
ad75a51e | 2247 | tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); |
7170a17e PB |
2248 | } |
2249 | ||
2250 | static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2251 | { | |
2252 | int vec_len = vector_len(s, decode); | |
2253 | ||
2254 | tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); | |
2255 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); | |
2256 | tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); | |
2257 | } | |
2258 | ||
2259 | static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2260 | { | |
2261 | tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0))); | |
2262 | tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); | |
2263 | } | |
2264 | ||
2265 | static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2266 | { | |
2267 | TCGv_i64 zero = tcg_constant_i64(0); | |
2268 | ||
2269 | tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); | |
2270 | tcg_gen_st_i64(zero, OP_PTR0, offsetof(ZMMReg, ZMM_Q(1))); | |
2271 | tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); | |
2272 | } | |
2273 | ||
2274 | static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2275 | { | |
2276 | int vec_len = vector_len(s, decode); | |
2277 | ||
2278 | tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); | |
2279 | tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); | |
2280 | tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); | |
2281 | } | |
2282 | ||
2283 | static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2284 | { | |
2285 | int vec_len = vector_len(s, decode); | |
2286 | ||
2287 | tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); | |
2288 | tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); | |
2289 | tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); | |
2290 | } | |
2291 | ||
2292 | static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2293 | { | |
2294 | tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); | |
2295 | tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); | |
2296 | } | |
2297 | ||
16fc5726 PB |
2298 | static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2299 | { | |
2300 | if (s->vex_w) { | |
2301 | gen_VMASKMOVPD_st(s, env, decode); | |
2302 | } else { | |
2303 | gen_VMASKMOVPS_st(s, env, decode); | |
2304 | } | |
2305 | } | |
2306 | ||
2307 | static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2308 | { | |
2309 | assert(s->vex_l); | |
2310 | gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2); | |
2311 | } | |
2312 | ||
79068477 PB |
2313 | static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2314 | { | |
2315 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
2316 | assert(s->vex_l); | |
2317 | gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); | |
2318 | } | |
2319 | ||
16fc5726 PB |
2320 | static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2321 | { | |
2322 | assert(!s->vex_l); | |
ad75a51e | 2323 | gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2); |
16fc5726 PB |
2324 | } |
2325 | ||
79068477 PB |
2326 | static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2327 | { | |
2328 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
2329 | assert(!s->vex_l); | |
ad75a51e | 2330 | gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
2331 | } |
2332 | ||
2333 | static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2334 | { | |
2335 | TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); | |
2336 | assert(!s->vex_l); | |
ad75a51e | 2337 | gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); |
79068477 PB |
2338 | } |
2339 | ||
aba2b8ec PB |
2340 | static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2341 | { | |
2342 | TCGv_i32 imm = tcg_constant_i32(decode->immediate); | |
2343 | SSEFunc_0_pppi ps, pd, fn; | |
2344 | ps = s->vex_l ? gen_helper_shufps_ymm : gen_helper_shufps_xmm; | |
2345 | pd = s->vex_l ? gen_helper_shufpd_ymm : gen_helper_shufpd_xmm; | |
2346 | fn = s->prefix & PREFIX_DATA ? pd : ps; | |
2347 | fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); | |
2348 | } | |
2349 | ||
f8d19eec PB |
2350 | static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2351 | { | |
2352 | SSEFunc_0_epp fn; | |
2353 | fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; | |
ad75a51e | 2354 | fn(tcg_env, OP_PTR1, OP_PTR2); |
f8d19eec PB |
2355 | set_cc_op(s, CC_OP_EFLAGS); |
2356 | } | |
2357 | ||
ce4fcb94 PB |
2358 | static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) |
2359 | { | |
2360 | TCGv_ptr ptr = tcg_temp_new_ptr(); | |
2361 | ||
ad75a51e | 2362 | tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_regs)); |
ce4fcb94 PB |
2363 | gen_helper_memset(ptr, ptr, tcg_constant_i32(0), |
2364 | tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg))); | |
ce4fcb94 PB |
2365 | } |
2366 | ||
2367 | static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) | |
2368 | { | |
2369 | int i; | |
2370 | ||
2371 | for (i = 0; i < CPU_NB_REGS; i++) { | |
2372 | int offset = offsetof(CPUX86State, xmm_regs[i].ZMM_X(1)); | |
2373 | tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0); | |
2374 | } | |
2375 | } |