]> git.proxmox.com Git - mirror_qemu.git/blame - target/i386/tcg/decode-new.c.inc
target/i386: add X86_SPECIALs for MOVSX and MOVZX
[mirror_qemu.git] / target / i386 / tcg / decode-new.c.inc
CommitLineData
b3e22b23
PB
1/*
2 * New-style decoder for i386 instructions
3 *
4 * Copyright (c) 2022 Red Hat, Inc.
5 *
6 * Author: Paolo Bonzini <pbonzini@redhat.com>
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21
22/*
23 * The decoder is mostly based on tables copied from the Intel SDM. As
24 * a result, most operand load and writeback is done entirely in common
25 * table-driven code using the same operand type (X86_TYPE_*) and
e000687f
PB
26 * size (X86_SIZE_*) codes used in the manual. There are a few differences
27 * though.
28 *
8147df44
PB
29 * Operand sizes
30 * -------------
31 *
32 * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64
33 * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the
34 * "v" or "z" sizes. The decoder simply makes them separate operand sizes.
35 *
e000687f
PB
36 * Vector operands
37 * ---------------
b3e22b23
PB
38 *
39 * The main difference is that the V, U and W types are extended to
40 * cover MMX as well; if an instruction is like
41 *
42 * por Pq, Qq
43 * 66 por Vx, Hx, Wx
44 *
45 * only the second row is included and the instruction is marked as a
46 * valid MMX instruction. The MMX flag directs the decoder to rewrite
47 * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing
48 * "x" to "q" if there is no prefix.
49 *
50 * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x"
51 * if the difference is expressed via prefixes. Individual instructions
52 * are separated by prefix in the generator functions.
53 *
8147df44
PB
54 * There is a custom size "xh" used to address half of a SSE/AVX operand.
55 * This points to a 64-bit operand for SSE operations, 128-bit operand
56 * for 256-bit AVX operands, etc. It is used for conversion operations
57 * such as VCVTPH2PS or VCVTSS2SD.
58 *
b3e22b23
PB
59 * There are a couple cases in which instructions (e.g. MOVD) write the
60 * whole XMM or MM register but are established incorrectly in the manual
61 * as "d" or "q". These have to be fixed for the decoder to work correctly.
e000687f
PB
62 *
63 * VEX exception classes
64 * ---------------------
65 *
66 * Speaking about imprecisions in the manual, the decoder treats all
67 * exception-class 4 instructions as having an optional VEX prefix, and
68 * all exception-class 6 instructions as having a mandatory VEX prefix.
69 * This is true except for a dozen instructions; these are in exception
70 * class 4 but do not ignore the VEX.W bit (which does not even exist
71 * without a VEX prefix). These instructions are mostly listed in Intel's
72 * table 2-16, but with a few exceptions.
73 *
74 * The AMD manual has more precise subclasses for exceptions, and unlike Intel
75 * they list the VEX.W requirements in the exception classes as well (except
76 * when they don't). AMD describes class 6 as "AVX Mixed Memory Argument"
77 * without defining what a mixed memory argument is, but still use 4 as the
78 * primary exception class... except when they don't.
79 *
80 * The summary is:
81 * Intel AMD VEX.W note
82 * -------------------------------------------------------------------
83 * vpblendd 4 4J 0
84 * vpblendvb 4 4E-X 0 (*)
85 * vpbroadcastq 6 6D 0 (+)
86 * vpermd/vpermps 4 4H 0 (§)
87 * vpermq/vpermpd 4 4H-1 1 (§)
88 * vpermilpd/vpermilps 4 6E 0 (^)
89 * vpmaskmovd 6 4K significant (^)
90 * vpsllv 4 4K significant
91 * vpsrav 4 4J 0
92 * vpsrlv 4 4K significant
93 * vtestps/vtestpd 4 4G 0
94 *
95 * (*) AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may
96 * explain why it is considered exception class 4. However,
97 * Intel says that VEX-only instructions should be in class 6...
98 *
99 * (+) Not found in Intel's table 2-16
100 *
101 * (§) 4H and 4H-1 do not mention VEX.W requirements, which are
102 * however present in the description of the instruction
103 *
104 * (^) these are the two cases in which Intel and AMD disagree on the
105 * primary exception class
b3e22b23
PB
106 */
107
108#define X86_OP_NONE { 0 },
109
110#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
111 .decode = glue(decode_, op), \
112 .op0 = glue(X86_TYPE_, op0_), \
113 .s0 = glue(X86_SIZE_, s0_), \
114 .op1 = glue(X86_TYPE_, op1_), \
115 .s1 = glue(X86_SIZE_, s1_), \
116 .op2 = glue(X86_TYPE_, op2_), \
117 .s2 = glue(X86_SIZE_, s2_), \
118 .is_decode = true, \
119 ## __VA_ARGS__ \
120}
121
122#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \
123 X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
124#define X86_OP_GROUP0(op, ...) \
125 X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
126
127#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \
128 .gen = glue(gen_, op), \
129 .op0 = glue(X86_TYPE_, op0_), \
130 .s0 = glue(X86_SIZE_, s0_), \
131 .op1 = glue(X86_TYPE_, op1_), \
132 .s1 = glue(X86_SIZE_, s1_), \
133 .op2 = glue(X86_TYPE_, op2_), \
134 .s2 = glue(X86_SIZE_, s2_), \
135 ## __VA_ARGS__ \
136}
137
138#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) \
139 X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, \
140 .op3 = X86_TYPE_I, .s3 = X86_SIZE_b, \
141 ## __VA_ARGS__)
142
143#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \
144 X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
57f6bba0
PB
145#define X86_OP_ENTRYw(op, op0, s0, ...) \
146 X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
147#define X86_OP_ENTRYr(op, op0, s0, ...) \
148 X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
b3e22b23
PB
149#define X86_OP_ENTRY0(op, ...) \
150 X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
151
caa01fad 152#define cpuid(feat) .cpuid = X86_FEAT_##feat,
b3e22b23 153#define xchg .special = X86_SPECIAL_Locked,
b609db94 154#define lock .special = X86_SPECIAL_HasLock,
b3e22b23 155#define mmx .special = X86_SPECIAL_MMX,
5baf5641
PB
156#define op0_Rd .special = X86_SPECIAL_Op0_Rd,
157#define op2_Ry .special = X86_SPECIAL_Op2_Ry,
16fc5726 158#define avx_movx .special = X86_SPECIAL_AVXExtMov,
8a36bbcf
PB
159#define sextT0 .special = X86_SPECIAL_SExtT0,
160#define zextT0 .special = X86_SPECIAL_ZExtT0,
b3e22b23 161
20581aad
PB
162#define vex1 .vex_class = 1,
163#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar,
164#define vex2 .vex_class = 2,
165#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar,
166#define vex3 .vex_class = 3,
167#define vex4 .vex_class = 4,
168#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned,
3d304620 169#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar,
20581aad
PB
170#define vex5 .vex_class = 5,
171#define vex6 .vex_class = 6,
172#define vex7 .vex_class = 7,
173#define vex8 .vex_class = 8,
174#define vex11 .vex_class = 11,
175#define vex12 .vex_class = 12,
176#define vex13 .vex_class = 13,
177
183e6679
PB
178#define chk(a) .check = X86_CHECK_##a,
179#define svm(a) .intercept = SVM_EXIT_##a,
180
20581aad
PB
181#define avx2_256 .vex_special = X86_VEX_AVX2_256,
182
55a33286
PB
183#define P_00 1
184#define P_66 (1 << PREFIX_DATA)
185#define P_F3 (1 << PREFIX_REPZ)
186#define P_F2 (1 << PREFIX_REPNZ)
187
188#define p_00 .valid_prefix = P_00,
189#define p_66 .valid_prefix = P_66,
190#define p_f3 .valid_prefix = P_F3,
191#define p_f2 .valid_prefix = P_F2,
192#define p_00_66 .valid_prefix = P_00 | P_66,
193#define p_00_f3 .valid_prefix = P_00 | P_F3,
194#define p_66_f2 .valid_prefix = P_66 | P_F2,
195#define p_00_66_f3 .valid_prefix = P_00 | P_66 | P_F3,
196#define p_66_f3_f2 .valid_prefix = P_66 | P_F3 | P_F2,
197#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2,
198
b3e22b23
PB
199static uint8_t get_modrm(DisasContext *s, CPUX86State *env)
200{
201 if (!s->has_modrm) {
202 s->modrm = x86_ldub_code(env, s);
203 s->has_modrm = true;
204 }
205 return s->modrm;
206}
207
92ec056a
PB
208static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4])
209{
210 if (s->prefix & PREFIX_REPNZ) {
211 return &entries[3];
212 } else if (s->prefix & PREFIX_REPZ) {
213 return &entries[2];
214 } else if (s->prefix & PREFIX_DATA) {
215 return &entries[1];
216 } else {
217 return &entries[0];
218 }
219}
220
57f6bba0
PB
221static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
222{
223 /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */
224 static const X86OpEntry group15_reg[8] = {
225 };
226
227 static const X86OpEntry group15_mem[8] = {
183e6679
PB
228 [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)),
229 [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)),
57f6bba0
PB
230 };
231
232 uint8_t modrm = get_modrm(s, env);
233 if ((modrm >> 6) == 3) {
234 *entry = group15_reg[(modrm >> 3) & 7];
235 } else {
236 *entry = group15_mem[(modrm >> 3) & 7];
237 }
238}
239
1d0b9261
PB
240static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
241{
242 static const X86GenFunc group17_gen[8] = {
243 NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
244 };
245 int op = (get_modrm(s, env) >> 3) & 7;
246 entry->gen = group17_gen[op];
247}
248
ce4fcb94
PB
249static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
250{
251 static const X86OpEntry opcodes_group12[8] = {
252 {},
253 {},
254 X86_OP_ENTRY3(PSRLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
255 {},
256 X86_OP_ENTRY3(PSRAW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
257 {},
258 X86_OP_ENTRY3(PSLLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
259 {},
260 };
261
262 int op = (get_modrm(s, env) >> 3) & 7;
263 *entry = opcodes_group12[op];
264}
265
266static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
267{
268 static const X86OpEntry opcodes_group13[8] = {
269 {},
270 {},
271 X86_OP_ENTRY3(PSRLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
272 {},
273 X86_OP_ENTRY3(PSRAD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
274 {},
275 X86_OP_ENTRY3(PSLLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
276 {},
277 };
278
279 int op = (get_modrm(s, env) >> 3) & 7;
280 *entry = opcodes_group13[op];
281}
282
283static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
284{
285 static const X86OpEntry opcodes_group14[8] = {
286 /* grp14 */
287 {},
288 {},
289 X86_OP_ENTRY3(PSRLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
290 X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
291 {},
292 {},
293 X86_OP_ENTRY3(PSLLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66),
294 X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66),
295 };
296
297 int op = (get_modrm(s, env) >> 3) & 7;
298 *entry = opcodes_group14[op];
299}
300
92ec056a
PB
301static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
302{
303 static const X86OpEntry opcodes_0F6F[4] = {
cab529b0 304 X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex5 mmx), /* movq */
92ec056a
PB
305 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1), /* movdqa */
306 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* movdqu */
307 {},
308 };
309 *entry = *decode_by_prefix(s, opcodes_0F6F);
310}
311
ce4fcb94
PB
312static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
313{
314 static const X86OpEntry pshufw[4] = {
315 X86_OP_ENTRY3(PSHUFW, P,q, Q,q, I,b, vex4 mmx),
316 X86_OP_ENTRY3(PSHUFD, V,x, W,x, I,b, vex4 avx2_256),
317 X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256),
318 X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256),
319 };
320
321 *entry = *decode_by_prefix(s, pshufw);
322}
323
324static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
325{
326 if (!(s->prefix & PREFIX_VEX)) {
327 entry->gen = gen_EMMS;
328 } else if (!s->vex_l) {
329 entry->gen = gen_VZEROUPPER;
330 entry->vex_class = 8;
331 } else {
332 entry->gen = gen_VZEROALL;
333 entry->vex_class = 8;
334 }
335}
336
d1c1a422
PB
337static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
338{
339 static const X86OpEntry opcodes_0F78[4] = {
340 {},
afa94dab 341 X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)), /* AMD extension */
d1c1a422 342 {},
afa94dab 343 X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)), /* AMD extension */
d1c1a422
PB
344 };
345 *entry = *decode_by_prefix(s, opcodes_0F78);
346}
347
348static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
349{
350 if (s->prefix & PREFIX_REPNZ) {
afa94dab 351 entry->gen = gen_INSERTQ_r; /* AMD extension */
d1c1a422 352 } else if (s->prefix & PREFIX_DATA) {
afa94dab 353 entry->gen = gen_EXTRQ_r; /* AMD extension */
d1c1a422
PB
354 } else {
355 entry->gen = NULL;
356 };
357}
358
359static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
360{
361 static const X86OpEntry opcodes_0F7E[4] = {
362 X86_OP_ENTRY3(MOVD_from, E,y, None,None, P,y, vex5 mmx),
363 X86_OP_ENTRY3(MOVD_from, E,y, None,None, V,y, vex5),
364 X86_OP_ENTRY3(MOVQ, V,x, None,None, W,q, vex5), /* wrong dest Vy on SDM! */
365 {},
366 };
367 *entry = *decode_by_prefix(s, opcodes_0F7E);
368}
369
370static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
371{
372 static const X86OpEntry opcodes_0F7F[4] = {
cab529b0 373 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex5 mmx), /* movq */
d1c1a422
PB
374 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1), /* movdqa */
375 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4_unal), /* movdqu */
376 {},
377 };
378 *entry = *decode_by_prefix(s, opcodes_0F7F);
379}
380
6bbeb98d
PB
381static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
382{
383 static const X86OpEntry movq[4] = {
384 {},
385 X86_OP_ENTRY3(MOVQ, W,x, None, None, V,q, vex5),
386 X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q),
387 X86_OP_ENTRY3(MOVq_dq, P,q, None, None, U,q),
388 };
389
390 *entry = *decode_by_prefix(s, movq);
391}
392
b3e22b23 393static const X86OpEntry opcodes_0F38_00toEF[240] = {
16fc5726
PB
394 [0x00] = X86_OP_ENTRY3(PSHUFB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
395 [0x01] = X86_OP_ENTRY3(PHADDW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
396 [0x02] = X86_OP_ENTRY3(PHADDD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
397 [0x03] = X86_OP_ENTRY3(PHADDSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
398 [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
399 [0x05] = X86_OP_ENTRY3(PHSUBW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
400 [0x06] = X86_OP_ENTRY3(PHSUBD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
401 [0x07] = X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
402
403 [0x10] = X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
e000687f 404 [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,xh, vex11 chk(W0) cpuid(F16C) p_66),
16fc5726
PB
405 [0x14] = X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE41) p_66),
406 [0x15] = X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE41) p_66),
407 /* Listed incorrectly as type 4 */
e000687f 408 [0x16] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */
16fc5726
PB
409 [0x17] = X86_OP_ENTRY3(VPTEST, None,None, V,x, W,x, vex4 cpuid(SSE41) p_66),
410
411 /*
412 * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed
413 * as 128-bit only in 2-17.
414 */
415 [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
416 [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
417 [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
418 [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
419 [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
420 [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
421
422 /* Same as PMOVSX. */
423 [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
424 [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
425 [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
426 [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
427 [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
428 [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66),
e000687f 429 [0x36] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
16fc5726
PB
430 [0x37] = X86_OP_ENTRY3(PCMPGTQ, V,x, H,x, W,x, vex4 cpuid(SSE42) avx2_256 p_66),
431
432 [0x40] = X86_OP_ENTRY3(PMULLD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
433 [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66),
434 /* Listed incorrectly as type 4 */
435 [0x45] = X86_OP_ENTRY3(VPSRLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66),
e000687f 436 [0x46] = X86_OP_ENTRY3(VPSRAV, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66),
16fc5726
PB
437 [0x47] = X86_OP_ENTRY3(VPSLLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66),
438
439 [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */
440 [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */
441 [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vgatherdps/d */
442 [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vgatherqps/d */
443
2872b0f3
PB
444 /* Should be exception type 2 but they do not have legacy SSE equivalents? */
445 [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
446 [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
447
448 [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
449 [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
450
451 [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
452 [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
453
16fc5726
PB
454 [0x08] = X86_OP_ENTRY3(PSIGNB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
455 [0x09] = X86_OP_ENTRY3(PSIGNW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
456 [0x0a] = X86_OP_ENTRY3(PSIGND, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
457 [0x0b] = X86_OP_ENTRY3(PMULHRSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
e000687f
PB
458 /* Listed incorrectly as type 4 */
459 [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_00_66),
460 [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66),
461 [0x0e] = X86_OP_ENTRY3(VTESTPS, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66),
462 [0x0f] = X86_OP_ENTRY3(VTESTPD, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66),
463
464 [0x18] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */
465 [0x19] = X86_OP_ENTRY3(VPBROADCASTQ, V,qq, None,None, W,q, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */
466 [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66),
16fc5726
PB
467 [0x1c] = X86_OP_ENTRY3(PABSB, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
468 [0x1d] = X86_OP_ENTRY3(PABSW, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
469 [0x1e] = X86_OP_ENTRY3(PABSD, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
470
471 [0x28] = X86_OP_ENTRY3(PMULDQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
472 [0x29] = X86_OP_ENTRY3(PCMPEQQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
473 [0x2a] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */
474 [0x2b] = X86_OP_ENTRY3(VPACKUSDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
e000687f
PB
475 [0x2c] = X86_OP_ENTRY3(VMASKMOVPS, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66),
476 [0x2d] = X86_OP_ENTRY3(VMASKMOVPD, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66),
16fc5726 477 /* Incorrectly listed as Mx,Hx,Vx in the manual */
e000687f
PB
478 [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66),
479 [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66),
16fc5726
PB
480
481 [0x38] = X86_OP_ENTRY3(PMINSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
482 [0x39] = X86_OP_ENTRY3(PMINSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
483 [0x3a] = X86_OP_ENTRY3(PMINUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
484 [0x3b] = X86_OP_ENTRY3(PMINUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
485 [0x3c] = X86_OP_ENTRY3(PMAXSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
486 [0x3d] = X86_OP_ENTRY3(PMAXSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
487 [0x3e] = X86_OP_ENTRY3(PMAXUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
488 [0x3f] = X86_OP_ENTRY3(PMAXUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
489
e000687f
PB
490 /* VPBROADCASTQ not listed as W0 in table 2-16 */
491 [0x58] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX2) p_66),
492 [0x59] = X86_OP_ENTRY3(VPBROADCASTQ, V,x, None,None, W,q, vex6 chk(W0) cpuid(AVX2) p_66),
493 [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66),
16fc5726 494
e000687f
PB
495 [0x78] = X86_OP_ENTRY3(VPBROADCASTB, V,x, None,None, W,b, vex6 chk(W0) cpuid(AVX2) p_66),
496 [0x79] = X86_OP_ENTRY3(VPBROADCASTW, V,x, None,None, W,w, vex6 chk(W0) cpuid(AVX2) p_66),
16fc5726
PB
497
498 [0x8c] = X86_OP_ENTRY3(VPMASKMOV, V,x, H,x, WM,x, vex6 cpuid(AVX2) p_66),
499 [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x, V,x, H,x, vex6 cpuid(AVX2) p_66),
500
2872b0f3
PB
501 /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */
502 [0x98] = X86_OP_ENTRY3(VFMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
503 [0x99] = X86_OP_ENTRY3(VFMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
504 [0x9a] = X86_OP_ENTRY3(VFMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
505 [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
506 [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
507 [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
508 [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
509 [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
510
511 [0xa8] = X86_OP_ENTRY3(VFMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
512 [0xa9] = X86_OP_ENTRY3(VFMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
513 [0xaa] = X86_OP_ENTRY3(VFMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
514 [0xab] = X86_OP_ENTRY3(VFMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
515 [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
516 [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
517 [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
518 [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
519
520 [0xb8] = X86_OP_ENTRY3(VFMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
521 [0xb9] = X86_OP_ENTRY3(VFMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
522 [0xba] = X86_OP_ENTRY3(VFMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
523 [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
524 [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
525 [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
526 [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
527 [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66),
528
e582b629
PB
529 [0xc8] = X86_OP_ENTRY2(SHA1NEXTE, V,dq, W,dq, cpuid(SHA_NI)),
530 [0xc9] = X86_OP_ENTRY2(SHA1MSG1, V,dq, W,dq, cpuid(SHA_NI)),
531 [0xca] = X86_OP_ENTRY2(SHA1MSG2, V,dq, W,dq, cpuid(SHA_NI)),
532 [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)),
533 [0xcc] = X86_OP_ENTRY2(SHA256MSG1, V,dq, W,dq, cpuid(SHA_NI)),
534 [0xcd] = X86_OP_ENTRY2(SHA256MSG2, V,dq, W,dq, cpuid(SHA_NI)),
535
16fc5726
PB
536 [0xdb] = X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpuid(AES) p_66),
537 [0xdc] = X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
538 [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
539 [0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
540 [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66),
b3e22b23
PB
541};
542
543/* five rows for no prefix, 66, F3, F2, 66+F2 */
544static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
1d0b9261
PB
545 [0] = {
546 X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
547 X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
548 {},
549 X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
550 X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
551 },
552 [1] = {
553 X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
554 X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
555 {},
556 X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
557 X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
558 },
559 [2] = {
560 X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
561 {},
562 {},
563 {},
564 {},
565 },
566 [3] = {
567 X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
568 {},
569 {},
570 {},
571 {},
572 },
573 [5] = {
574 X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
575 {},
8a36bbcf
PB
576 X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
577 X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)),
1d0b9261
PB
578 {},
579 },
580 [6] = {
581 {},
582 X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
583 X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
584 X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
585 {},
586 },
587 [7] = {
8a36bbcf 588 X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
1d0b9261 589 X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
8a36bbcf
PB
590 X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)),
591 X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)),
1d0b9261
PB
592 {},
593 },
b3e22b23
PB
594};
595
596static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
597{
598 *b = x86_ldub_code(env, s);
599 if (*b < 0xf0) {
600 *entry = opcodes_0F38_00toEF[*b];
601 } else {
602 int row = 0;
603 if (s->prefix & PREFIX_REPZ) {
604 /* The REPZ (F3) prefix has priority over 66 */
605 row = 2;
606 } else {
607 row += s->prefix & PREFIX_REPNZ ? 3 : 0;
608 row += s->prefix & PREFIX_DATA ? 1 : 0;
609 }
610 *entry = opcodes_0F38_F0toFF[*b & 15][row];
611 }
612}
613
79068477
PB
614static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
615{
616 static const X86OpEntry
617 vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66),
618 vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d, vex5 cpuid(SSE41) p_66);
619
620 int modrm = get_modrm(s, env);
621 *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem;
622}
623
b3e22b23 624static const X86OpEntry opcodes_0F3A[256] = {
79068477
PB
625 /*
626 * These are VEX-only, but incorrectly listed in the manual as exception type 4.
627 * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256
628 * only.
629 */
e000687f
PB
630 [0x00] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66),
631 [0x01] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */
632 [0x02] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */
633 [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66),
634 [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66),
635 [0x06] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
79068477 636
5baf5641
PB
637 [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66),
638 [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66),
79068477
PB
639 [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66),
640 [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66),
e000687f 641 [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 chk(W0) cpuid(F16C) p_66),
79068477 642
5baf5641 643 [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) op2_Ry p_66),
79068477
PB
644 [0x21] = X86_OP_GROUP0(VINSERTPS),
645 [0x22] = X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE41) p_66),
646
647 [0x40] = X86_OP_ENTRY4(VDDPS, V,x, H,x, W,x, vex2 cpuid(SSE41) p_66),
648 [0x41] = X86_OP_ENTRY4(VDDPD, V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66),
649 [0x42] = X86_OP_ENTRY4(VMPSADBW, V,x, H,x, W,x, vex2 cpuid(SSE41) avx2_256 p_66),
650 [0x44] = X86_OP_ENTRY4(PCLMULQDQ, V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66),
e000687f 651 [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
79068477
PB
652
653 [0x60] = X86_OP_ENTRY4(PCMPESTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
654 [0x61] = X86_OP_ENTRY4(PCMPESTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
655 [0x62] = X86_OP_ENTRY4(PCMPISTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
656 [0x63] = X86_OP_ENTRY4(PCMPISTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66),
657
658 [0x08] = X86_OP_ENTRY3(VROUNDPS, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66),
659 [0x09] = X86_OP_ENTRY3(VROUNDPD, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66),
660 /*
661 * Not listed as four operand in the manual. Also writes and reads 128-bits
662 * from the first two operands due to the V operand picking higher entries of
663 * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect.
664 * For other unary operations such as VSQRTSx this is hidden by the "REPScalar"
665 * value of vex_special, because the table lists the operand types of VSQRTPx.
666 */
667 [0x0a] = X86_OP_ENTRY4(VROUNDSS, V,x, H,x, W,ss, vex3 cpuid(SSE41) p_66),
668 [0x0b] = X86_OP_ENTRY4(VROUNDSD, V,x, H,x, W,sd, vex3 cpuid(SSE41) p_66),
669 [0x0c] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66),
670 [0x0d] = X86_OP_ENTRY4(VBLENDPD, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66),
16fc5726
PB
671 [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66),
672 [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66),
79068477 673
e000687f
PB
674 [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66),
675 [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66),
79068477 676
e000687f
PB
677 [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66),
678 [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66),
79068477
PB
679
680 /* Listed incorrectly as type 4 */
e000687f
PB
681 [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66),
682 [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66),
683 [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66 avx2_256),
79068477 684
e582b629
PB
685 [0xcc] = X86_OP_ENTRY3(SHA1RNDS4, V,dq, W,dq, I,b, cpuid(SHA_NI)),
686
79068477
PB
687 [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b, vex4 cpuid(AES) p_66),
688
1d0b9261 689 [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
b3e22b23
PB
690};
691
692static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
693{
694 *b = x86_ldub_code(env, s);
695 *entry = opcodes_0F3A[*b];
696}
697
7170a17e
PB
698/*
699 * There are some mistakes in the operands in the manual, and the load/store/register
700 * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and
701 * efficiency of implementation rather than copying what the manual says.
702 *
703 * In particular:
704 *
705 * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b,
706 * but this is not mentioned in the tables.
707 *
708 * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their
709 * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high
710 * quadword of the V operand.
711 */
712static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
713{
714 static const X86OpEntry opcodes_0F10_reg[4] = {
715 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */
716 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */
cab529b0
RZ
717 X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex5),
718 X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex5), /* MOVSD */
7170a17e
PB
719 };
720
721 static const X86OpEntry opcodes_0F10_mem[4] = {
722 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */
723 X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */
cab529b0
RZ
724 X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex5),
725 X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex5),
7170a17e
PB
726 };
727
728 if ((get_modrm(s, env) >> 6) == 3) {
729 *entry = *decode_by_prefix(s, opcodes_0F10_reg);
730 } else {
731 *entry = *decode_by_prefix(s, opcodes_0F10_mem);
732 }
733}
734
735static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
736{
737 static const X86OpEntry opcodes_0F11_reg[4] = {
afa94dab
RZ
738 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */
739 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */
cab529b0
RZ
740 X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex5),
741 X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex5), /* MOVSD */
7170a17e
PB
742 };
743
744 static const X86OpEntry opcodes_0F11_mem[4] = {
afa94dab
RZ
745 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */
746 X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */
cab529b0
RZ
747 X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex5),
748 X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */
7170a17e
PB
749 };
750
751 if ((get_modrm(s, env) >> 6) == 3) {
752 *entry = *decode_by_prefix(s, opcodes_0F11_reg);
753 } else {
754 *entry = *decode_by_prefix(s, opcodes_0F11_mem);
755 }
756}
757
758static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
759{
760 static const X86OpEntry opcodes_0F12_mem[4] = {
761 /*
762 * Use dq for operand for compatibility with gen_MOVSD and
763 * to allow VEX128 only.
764 */
cab529b0
RZ
765 X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPS */
766 X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPD */
7170a17e 767 X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)),
cab529b0 768 X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */
7170a17e
PB
769 };
770 static const X86OpEntry opcodes_0F12_reg[4] = {
cab529b0
RZ
771 X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex7),
772 X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex5), /* MOVLPD */
7170a17e 773 X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)),
cab529b0 774 X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex5 cpuid(SSE3)),
7170a17e
PB
775 };
776
777 if ((get_modrm(s, env) >> 6) == 3) {
778 *entry = *decode_by_prefix(s, opcodes_0F12_reg);
779 } else {
780 *entry = *decode_by_prefix(s, opcodes_0F12_mem);
781 if ((s->prefix & PREFIX_REPNZ) && s->vex_l) {
782 entry->s2 = X86_SIZE_qq;
783 }
784 }
785}
786
787static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
788{
789 static const X86OpEntry opcodes_0F16_mem[4] = {
790 /*
791 * Operand 1 technically only reads the low 64 bits, but uses dq so that
792 * it is easier to check for op0 == op1 in an endianness-neutral manner.
793 */
cab529b0
RZ
794 X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPS */
795 X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPD */
7170a17e
PB
796 X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)),
797 {},
798 };
799 static const X86OpEntry opcodes_0F16_reg[4] = {
800 /* Same as above, operand 1 could be Hq if it wasn't for big-endian. */
cab529b0
RZ
801 X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex7),
802 X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex5), /* MOVHPD */
7170a17e
PB
803 X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)),
804 {},
805 };
806
807 if ((get_modrm(s, env) >> 6) == 3) {
808 *entry = *decode_by_prefix(s, opcodes_0F16_reg);
809 } else {
810 *entry = *decode_by_prefix(s, opcodes_0F16_mem);
811 }
812}
813
f8d19eec
PB
814static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
815{
816 static const X86OpEntry opcodes_0F2A[4] = {
817 X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q),
818 X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q),
819 X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3),
820 X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3),
821 };
822 *entry = *decode_by_prefix(s, opcodes_0F2A);
823}
824
825static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
826{
827 static const X86OpEntry opcodes_0F2B[4] = {
8bf171c2
RZ
828 X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPS */
829 X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPD */
830 /* AMD extensions */
f8d19eec
PB
831 X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */
832 X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */
833 };
834
835 *entry = *decode_by_prefix(s, opcodes_0F2B);
836}
837
838static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
839{
840 static const X86OpEntry opcodes_0F2C[4] = {
841 /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit. */
842 X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,q),
843 X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,dq),
844 X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,ss, vex3),
845 X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,sd, vex3),
846 };
847 *entry = *decode_by_prefix(s, opcodes_0F2C);
848}
849
850static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
851{
852 static const X86OpEntry opcodes_0F2D[4] = {
853 /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit. */
854 X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,q),
855 X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,dq),
856 X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,ss, vex3),
857 X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,sd, vex3),
858 };
859 *entry = *decode_by_prefix(s, opcodes_0F2D);
860}
861
2b55e479
PB
862static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
863{
864 /*
865 * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD
866 * respectively. Scalar values usually are associated with 0xF2 and 0xF3, for
867 * which X86_VEX_REPScalar exists, but here it has to be decoded by hand.
868 */
869 entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss);
870 entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI);
871}
872
03b45880
PB
873static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
874{
875 if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) {
876 entry->op1 = X86_TYPE_None;
877 entry->s1 = X86_SIZE_None;
878 }
879 switch (*b) {
880 case 0x51: entry->gen = gen_VSQRT; break;
881 case 0x52: entry->gen = gen_VRSQRT; break;
882 case 0x53: entry->gen = gen_VRCP; break;
03b45880
PB
883 }
884}
885
abd41884
PB
886static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
887{
888 static const X86OpEntry opcodes_0F5A[4] = {
889 X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */
890 X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */
891 X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */
892 X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */
893 };
894 *entry = *decode_by_prefix(s, opcodes_0F5A);
895}
896
03b45880
PB
897static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
898{
899 static const X86OpEntry opcodes_0F5B[4] = {
900 X86_OP_ENTRY2(VCVTDQ2PS, V,x, W,x, vex2),
901 X86_OP_ENTRY2(VCVTPS2DQ, V,x, W,x, vex2),
902 X86_OP_ENTRY2(VCVTTPS2DQ, V,x, W,x, vex2),
903 {},
904 };
905 *entry = *decode_by_prefix(s, opcodes_0F5B);
906}
907
6bbeb98d
PB
908static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
909{
910 static const X86OpEntry opcodes_0FE6[4] = {
911 {},
912 X86_OP_ENTRY2(VCVTTPD2DQ, V,x, W,x, vex2),
cab529b0 913 X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex5),
6bbeb98d
PB
914 X86_OP_ENTRY2(VCVTPD2DQ, V,x, W,x, vex2),
915 };
916 *entry = *decode_by_prefix(s, opcodes_0FE6);
917}
918
b3e22b23 919static const X86OpEntry opcodes_0F[256] = {
71a0891d
PB
920 [0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */
921 /*
922 * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
923 * more like an Ib operand. Dispatch to the right helper in a single gen_*
924 * function.
925 */
926 [0x0F] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)),
927
7170a17e
PB
928 [0x10] = X86_OP_GROUP0(0F10),
929 [0x11] = X86_OP_GROUP0(0F11),
930 [0x12] = X86_OP_GROUP0(0F12),
cab529b0 931 [0x13] = X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex5 p_00_66),
7170a17e
PB
932 [0x14] = X86_OP_ENTRY3(VUNPCKLPx, V,x, H,x, W,x, vex4 p_00_66),
933 [0x15] = X86_OP_ENTRY3(VUNPCKHPx, V,x, H,x, W,x, vex4 p_00_66),
934 [0x16] = X86_OP_GROUP0(0F16),
935 /* Incorrectly listed as Mq,Vq in the manual */
cab529b0 936 [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66),
7170a17e 937
03b45880 938 [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66),
afa94dab
RZ
939 [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */
940 [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */
941 [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */
03b45880
PB
942 [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */
943 [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */
944 [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */
945 [0x57] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 p_00_66), /* vxor */
946
92ec056a
PB
947 [0x60] = X86_OP_ENTRY3(PUNPCKLBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
948 [0x61] = X86_OP_ENTRY3(PUNPCKLWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
949 [0x62] = X86_OP_ENTRY3(PUNPCKLDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
950 [0x63] = X86_OP_ENTRY3(PACKSSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
951 [0x64] = X86_OP_ENTRY3(PCMPGTB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
952 [0x65] = X86_OP_ENTRY3(PCMPGTW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
953 [0x66] = X86_OP_ENTRY3(PCMPGTD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
954 [0x67] = X86_OP_ENTRY3(PACKUSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
955
ce4fcb94
PB
956 [0x70] = X86_OP_GROUP0(0F70),
957 [0x71] = X86_OP_GROUP0(group12),
958 [0x72] = X86_OP_GROUP0(group13),
959 [0x73] = X86_OP_GROUP0(group14),
960 [0x74] = X86_OP_ENTRY3(PCMPEQB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
961 [0x75] = X86_OP_ENTRY3(PCMPEQW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
962 [0x76] = X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
963 [0x77] = X86_OP_GROUP0(0F77),
964
f8d19eec
PB
965 [0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */
966 [0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */
967 [0x2A] = X86_OP_GROUP0(0F2A),
968 [0x2B] = X86_OP_GROUP0(0F2B),
969 [0x2C] = X86_OP_GROUP0(0F2C),
970 [0x2D] = X86_OP_GROUP0(0F2D),
2b55e479
PB
971 [0x2E] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VUCOMISS/SD */
972 [0x2F] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VCOMISS/SD */
f8d19eec 973
b3e22b23
PB
974 [0x38] = X86_OP_GROUP0(0F38),
975 [0x3a] = X86_OP_GROUP0(0F3A),
92ec056a 976
03b45880
PB
977 [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
978 [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
abd41884 979 [0x5a] = X86_OP_GROUP0(0F5A),
03b45880
PB
980 [0x5b] = X86_OP_GROUP0(0F5B),
981 [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
982 [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
983 [0x5e] = X86_OP_ENTRY3(VDIV, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
984 [0x5f] = X86_OP_ENTRY3(VMAX, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
985
92ec056a
PB
986 [0x68] = X86_OP_ENTRY3(PUNPCKHBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
987 [0x69] = X86_OP_ENTRY3(PUNPCKHWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
988 [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
989 [0x6b] = X86_OP_ENTRY3(PACKSSDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
990 [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256),
991 [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256),
992 [0x6e] = X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_00_66), /* wrong dest Vy on SDM! */
993 [0x6f] = X86_OP_GROUP0(0F6F),
1d0efbdb 994
d1c1a422
PB
995 [0x78] = X86_OP_GROUP0(0F78),
996 [0x79] = X86_OP_GROUP2(0F79, V,x, U,x, cpuid(SSE4A)),
997 [0x7c] = X86_OP_ENTRY3(VHADD, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
998 [0x7d] = X86_OP_ENTRY3(VHSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
999 [0x7e] = X86_OP_GROUP0(0F7E),
1000 [0x7f] = X86_OP_GROUP0(0F7F),
1001
57f6bba0
PB
1002 [0xae] = X86_OP_GROUP0(group15),
1003
aba2b8ec
PB
1004 [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2),
1005 [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66),
1006 [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66),
1007 [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66),
1008
6bbeb98d
PB
1009 [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2),
1010 [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1011 [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1012 [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1013 [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1014 [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1015 [0xd6] = X86_OP_GROUP0(0FD6),
1016 [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66),
1017
1018 [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1019 [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
1020 [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
1021 [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1022 [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1023 [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1024 [0xe6] = X86_OP_GROUP0(0FE6),
1025 [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
1026
1027 [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
1028 [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
1029 [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
1030 [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66),
1031 [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1032 [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1033 [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1034 [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
1035
1d0efbdb
PB
1036 /* Incorrectly missing from 2-17 */
1037 [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1038 [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1039 [0xda] = X86_OP_ENTRY3(PMINUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1040 [0xdb] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1041 [0xdc] = X86_OP_ENTRY3(PADDUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1042 [0xdd] = X86_OP_ENTRY3(PADDUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1043 [0xde] = X86_OP_ENTRY3(PMAXUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1044 [0xdf] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1045
1046 [0xe8] = X86_OP_ENTRY3(PSUBSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1047 [0xe9] = X86_OP_ENTRY3(PSUBSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1048 [0xea] = X86_OP_ENTRY3(PMINSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1049 [0xeb] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1050 [0xec] = X86_OP_ENTRY3(PADDSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1051 [0xed] = X86_OP_ENTRY3(PADDSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1052 [0xee] = X86_OP_ENTRY3(PMAXSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1053 [0xef] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1054
1055 [0xf8] = X86_OP_ENTRY3(PSUBB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1056 [0xf9] = X86_OP_ENTRY3(PSUBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1057 [0xfa] = X86_OP_ENTRY3(PSUBD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1058 [0xfb] = X86_OP_ENTRY3(PSUBQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1059 [0xfc] = X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1060 [0xfd] = X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1061 [0xfe] = X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66),
1062 /* 0xff = UD0 */
b3e22b23
PB
1063};
1064
1065static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1066{
1067 *entry = opcodes_0F[*b];
1068}
1069
1070static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1071{
1072 *b = x86_ldub_code(env, s);
1073 do_decode_0F(s, env, entry, b);
1074}
1075
1076static const X86OpEntry opcodes_root[256] = {
1077 [0x0F] = X86_OP_GROUP0(0F),
1078};
1079
1080#undef mmx
20581aad
PB
1081#undef vex1
1082#undef vex2
1083#undef vex3
1084#undef vex4
1085#undef vex4_unal
1086#undef vex5
1087#undef vex6
1088#undef vex7
1089#undef vex8
1090#undef vex11
1091#undef vex12
1092#undef vex13
b3e22b23
PB
1093
1094/*
1095 * Decode the fixed part of the opcode and place the last
1096 * in b.
1097 */
1098static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
1099{
1100 *entry = opcodes_root[*b];
1101}
1102
1103
1104static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1105 X86DecodedOp *op, X86OpType type)
1106{
1107 int modrm = get_modrm(s, env);
1108 if ((modrm >> 6) == 3) {
b3e22b23
PB
1109 op->n = (modrm & 7);
1110 if (type != X86_TYPE_Q && type != X86_TYPE_N) {
1111 op->n |= REX_B(s);
1112 }
1113 } else {
1114 op->has_ea = true;
1115 op->n = -1;
1116 decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env));
1117 }
1118 return modrm;
1119}
1120
1121static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot)
1122{
1123 switch (size) {
1124 case X86_SIZE_b: /* byte */
1125 *ot = MO_8;
1126 return true;
1127
1128 case X86_SIZE_d: /* 32-bit */
1129 case X86_SIZE_ss: /* SSE/AVX scalar single precision */
1130 *ot = MO_32;
1131 return true;
1132
1133 case X86_SIZE_p: /* Far pointer, return offset size */
1134 case X86_SIZE_s: /* Descriptor, return offset size */
1135 case X86_SIZE_v: /* 16/32/64-bit, based on operand size */
1136 *ot = s->dflag;
1137 return true;
1138
1139 case X86_SIZE_pi: /* MMX */
1140 case X86_SIZE_q: /* 64-bit */
1141 case X86_SIZE_sd: /* SSE/AVX scalar double precision */
1142 *ot = MO_64;
1143 return true;
1144
1145 case X86_SIZE_w: /* 16-bit */
1146 *ot = MO_16;
1147 return true;
1148
1149 case X86_SIZE_y: /* 32/64-bit, based on operand size */
1150 *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
1151 return true;
1152
1153 case X86_SIZE_z: /* 16-bit for 16-bit operand size, else 32-bit */
1154 *ot = s->dflag == MO_16 ? MO_16 : MO_32;
1155 return true;
1156
1157 case X86_SIZE_dq: /* SSE/AVX 128-bit */
1158 if (e->special == X86_SPECIAL_MMX &&
1159 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1160 *ot = MO_64;
1161 return true;
1162 }
1163 if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) {
1164 return false;
1165 }
1166 *ot = MO_128;
1167 return true;
1168
1169 case X86_SIZE_qq: /* AVX 256-bit */
1170 if (!s->vex_l) {
1171 return false;
1172 }
1173 *ot = MO_256;
1174 return true;
1175
1176 case X86_SIZE_x: /* 128/256-bit, based on operand size */
1177 if (e->special == X86_SPECIAL_MMX &&
1178 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1179 *ot = MO_64;
1180 return true;
1181 }
1182 /* fall through */
1183 case X86_SIZE_ps: /* SSE/AVX packed single precision */
1184 case X86_SIZE_pd: /* SSE/AVX packed double precision */
1185 *ot = s->vex_l ? MO_256 : MO_128;
1186 return true;
1187
a48b2697 1188 case X86_SIZE_xh: /* SSE/AVX packed half register */
cf5ec664
PB
1189 *ot = s->vex_l ? MO_128 : MO_64;
1190 return true;
1191
b3e22b23
PB
1192 case X86_SIZE_d64: /* Default to 64-bit in 64-bit mode */
1193 *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag;
1194 return true;
1195
1196 case X86_SIZE_f64: /* Ignore size override prefix in 64-bit mode */
1197 *ot = CODE64(s) ? MO_64 : s->dflag;
1198 return true;
1199
1200 default:
1201 *ot = -1;
1202 return true;
1203 }
1204}
1205
1206static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
1207 X86DecodedOp *op, X86OpType type, int b)
1208{
1209 int modrm;
1210
1211 switch (type) {
1212 case X86_TYPE_None: /* Implicit or absent */
1213 case X86_TYPE_A: /* Implicit */
1214 case X86_TYPE_F: /* EFLAGS/RFLAGS */
1215 break;
1216
1217 case X86_TYPE_B: /* VEX.vvvv selects a GPR */
1218 op->unit = X86_OP_INT;
1219 op->n = s->vex_v;
1220 break;
1221
1222 case X86_TYPE_C: /* REG in the modrm byte selects a control register */
1223 op->unit = X86_OP_CR;
1224 goto get_reg;
1225
1226 case X86_TYPE_D: /* REG in the modrm byte selects a debug register */
1227 op->unit = X86_OP_DR;
1228 goto get_reg;
1229
1230 case X86_TYPE_G: /* REG in the modrm byte selects a GPR */
1231 op->unit = X86_OP_INT;
1232 goto get_reg;
1233
1234 case X86_TYPE_S: /* reg selects a segment register */
1235 op->unit = X86_OP_SEG;
1236 goto get_reg;
1237
1238 case X86_TYPE_P:
1239 op->unit = X86_OP_MMX;
1240 goto get_reg;
1241
1242 case X86_TYPE_V: /* reg in the modrm byte selects an XMM/YMM register */
1243 if (decode->e.special == X86_SPECIAL_MMX &&
1244 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1245 op->unit = X86_OP_MMX;
1246 } else {
1247 op->unit = X86_OP_SSE;
1248 }
1249 get_reg:
1250 op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
1251 break;
1252
1253 case X86_TYPE_E: /* ALU modrm operand */
1254 op->unit = X86_OP_INT;
1255 goto get_modrm;
1256
1257 case X86_TYPE_Q: /* MMX modrm operand */
1258 op->unit = X86_OP_MMX;
1259 goto get_modrm;
1260
1261 case X86_TYPE_W: /* XMM/YMM modrm operand */
1262 if (decode->e.special == X86_SPECIAL_MMX &&
1263 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1264 op->unit = X86_OP_MMX;
1265 } else {
1266 op->unit = X86_OP_SSE;
1267 }
1268 goto get_modrm;
1269
1270 case X86_TYPE_N: /* R/M in the modrm byte selects an MMX register */
1271 op->unit = X86_OP_MMX;
1272 goto get_modrm_reg;
1273
1274 case X86_TYPE_U: /* R/M in the modrm byte selects an XMM/YMM register */
1275 if (decode->e.special == X86_SPECIAL_MMX &&
1276 !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) {
1277 op->unit = X86_OP_MMX;
1278 } else {
1279 op->unit = X86_OP_SSE;
1280 }
1281 goto get_modrm_reg;
1282
1283 case X86_TYPE_R: /* R/M in the modrm byte selects a register */
1284 op->unit = X86_OP_INT;
1285 get_modrm_reg:
1286 modrm = get_modrm(s, env);
1287 if ((modrm >> 6) != 3) {
1288 return false;
1289 }
1290 goto get_modrm;
1291
6bbeb98d
PB
1292 case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */
1293 op->unit = X86_OP_SSE;
1294 /* fall through */
b3e22b23
PB
1295 case X86_TYPE_M: /* modrm byte selects a memory operand */
1296 modrm = get_modrm(s, env);
1297 if ((modrm >> 6) == 3) {
1298 return false;
1299 }
1300 get_modrm:
1301 decode_modrm(s, env, decode, op, type);
1302 break;
1303
1304 case X86_TYPE_O: /* Absolute address encoded in the instruction */
1305 op->unit = X86_OP_INT;
1306 op->has_ea = true;
1307 op->n = -1;
1308 decode->mem = (AddressParts) {
1309 .def_seg = R_DS,
1310 .base = -1,
1311 .index = -1,
1312 .disp = insn_get_addr(env, s, s->aflag)
1313 };
1314 break;
1315
1316 case X86_TYPE_H: /* For AVX, VEX.vvvv selects an XMM/YMM register */
1317 if ((s->prefix & PREFIX_VEX)) {
1318 op->unit = X86_OP_SSE;
1319 op->n = s->vex_v;
1320 break;
1321 }
1322 if (op == &decode->op[0]) {
1323 /* shifts place the destination in VEX.vvvv, use modrm */
1324 return decode_op(s, env, decode, op, decode->e.op1, b);
1325 } else {
1326 return decode_op(s, env, decode, op, decode->e.op0, b);
1327 }
1328
1329 case X86_TYPE_I: /* Immediate */
1330 op->unit = X86_OP_IMM;
1331 decode->immediate = insn_get_signed(env, s, op->ot);
1332 break;
1333
1334 case X86_TYPE_J: /* Relative offset for a jump */
1335 op->unit = X86_OP_IMM;
1336 decode->immediate = insn_get_signed(env, s, op->ot);
1337 decode->immediate += s->pc - s->cs_base;
1338 if (s->dflag == MO_16) {
1339 decode->immediate &= 0xffff;
1340 } else if (!CODE64(s)) {
1341 decode->immediate &= 0xffffffffu;
1342 }
1343 break;
1344
1345 case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */
1346 op->n = insn_get(env, s, op->ot) >> 4;
1347 break;
1348
1349 case X86_TYPE_X: /* string source */
1350 op->n = -1;
1351 decode->mem = (AddressParts) {
1352 .def_seg = R_DS,
1353 .base = R_ESI,
1354 .index = -1,
1355 };
1356 break;
1357
1358 case X86_TYPE_Y: /* string destination */
1359 op->n = -1;
1360 decode->mem = (AddressParts) {
1361 .def_seg = R_ES,
1362 .base = R_EDI,
1363 .index = -1,
1364 };
1365 break;
1366
1367 case X86_TYPE_2op:
1368 *op = decode->op[0];
1369 break;
1370
1371 case X86_TYPE_LoBits:
1372 op->n = (b & 7) | REX_B(s);
1373 op->unit = X86_OP_INT;
1374 break;
1375
1376 case X86_TYPE_0 ... X86_TYPE_7:
1377 op->n = type - X86_TYPE_0;
1378 op->unit = X86_OP_INT;
1379 break;
1380
1381 case X86_TYPE_ES ... X86_TYPE_GS:
1382 op->n = type - X86_TYPE_ES;
1383 op->unit = X86_OP_SEG;
1384 break;
1385 }
1386
1387 return true;
1388}
1389
55a33286
PB
1390static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e)
1391{
1392 uint16_t sse_prefixes;
1393
1394 if (!e->valid_prefix) {
1395 return true;
1396 }
1397 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
1398 /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */
1399 s->prefix &= ~PREFIX_DATA;
1400 }
1401
1402 /* Now, either zero or one bit is set in sse_prefixes. */
1403 sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA);
1404 return e->valid_prefix & (1 << sse_prefixes);
1405}
1406
b3e22b23
PB
1407static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func,
1408 X86DecodedInsn *decode)
1409{
1410 X86OpEntry *e = &decode->e;
1411
1412 decode_func(s, env, e, &decode->b);
1413 while (e->is_decode) {
1414 e->is_decode = false;
1415 e->decode(s, env, e, &decode->b);
1416 }
1417
55a33286
PB
1418 if (!validate_sse_prefix(s, e)) {
1419 return false;
1420 }
1421
b3e22b23
PB
1422 /* First compute size of operands in order to initialize s->rip_offset. */
1423 if (e->op0 != X86_TYPE_None) {
1424 if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) {
1425 return false;
1426 }
1427 if (e->op0 == X86_TYPE_I) {
1428 s->rip_offset += 1 << decode->op[0].ot;
1429 }
1430 }
1431 if (e->op1 != X86_TYPE_None) {
1432 if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) {
1433 return false;
1434 }
1435 if (e->op1 == X86_TYPE_I) {
1436 s->rip_offset += 1 << decode->op[1].ot;
1437 }
1438 }
1439 if (e->op2 != X86_TYPE_None) {
1440 if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) {
1441 return false;
1442 }
1443 if (e->op2 == X86_TYPE_I) {
1444 s->rip_offset += 1 << decode->op[2].ot;
1445 }
1446 }
1447 if (e->op3 != X86_TYPE_None) {
79068477
PB
1448 /*
1449 * A couple instructions actually use the extra immediate byte for an Lx
1450 * register operand; those are handled in the gen_* functions as one off.
1451 */
b3e22b23
PB
1452 assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b);
1453 s->rip_offset += 1;
1454 }
1455
1456 if (e->op0 != X86_TYPE_None &&
1457 !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) {
1458 return false;
1459 }
1460
1461 if (e->op1 != X86_TYPE_None &&
1462 !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) {
1463 return false;
1464 }
1465
1466 if (e->op2 != X86_TYPE_None &&
1467 !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) {
1468 return false;
1469 }
1470
1471 if (e->op3 != X86_TYPE_None) {
1472 decode->immediate = insn_get_signed(env, s, MO_8);
1473 }
1474
1475 return true;
1476}
1477
caa01fad
PB
1478static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid)
1479{
1480 switch (cpuid) {
1481 case X86_FEAT_None:
1482 return true;
cf5ec664
PB
1483 case X86_FEAT_F16C:
1484 return (s->cpuid_ext_features & CPUID_EXT_F16C);
2872b0f3
PB
1485 case X86_FEAT_FMA:
1486 return (s->cpuid_ext_features & CPUID_EXT_FMA);
caa01fad
PB
1487 case X86_FEAT_MOVBE:
1488 return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
1489 case X86_FEAT_PCLMULQDQ:
1490 return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
1491 case X86_FEAT_SSE:
1492 return (s->cpuid_ext_features & CPUID_SSE);
1493 case X86_FEAT_SSE2:
1494 return (s->cpuid_ext_features & CPUID_SSE2);
1495 case X86_FEAT_SSE3:
1496 return (s->cpuid_ext_features & CPUID_EXT_SSE3);
1497 case X86_FEAT_SSSE3:
1498 return (s->cpuid_ext_features & CPUID_EXT_SSSE3);
1499 case X86_FEAT_SSE41:
1500 return (s->cpuid_ext_features & CPUID_EXT_SSE41);
1501 case X86_FEAT_SSE42:
1502 return (s->cpuid_ext_features & CPUID_EXT_SSE42);
1503 case X86_FEAT_AES:
1504 if (!(s->cpuid_ext_features & CPUID_EXT_AES)) {
1505 return false;
1506 } else if (!(s->prefix & PREFIX_VEX)) {
1507 return true;
1508 } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) {
1509 return false;
1510 } else {
1511 return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES);
1512 }
1513
1514 case X86_FEAT_AVX:
1515 return (s->cpuid_ext_features & CPUID_EXT_AVX);
1516
71a0891d
PB
1517 case X86_FEAT_3DNOW:
1518 return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
caa01fad
PB
1519 case X86_FEAT_SSE4A:
1520 return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A);
1521
1522 case X86_FEAT_ADX:
1523 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX);
1524 case X86_FEAT_BMI1:
1525 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1);
1526 case X86_FEAT_BMI2:
1527 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
1528 case X86_FEAT_AVX2:
1529 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
e582b629
PB
1530 case X86_FEAT_SHA_NI:
1531 return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
caa01fad
PB
1532 }
1533 g_assert_not_reached();
1534}
1535
20581aad
PB
1536static bool validate_vex(DisasContext *s, X86DecodedInsn *decode)
1537{
1538 X86OpEntry *e = &decode->e;
1539
1540 switch (e->vex_special) {
1541 case X86_VEX_REPScalar:
1542 /*
1543 * Instructions which differ between 00/66 and F2/F3 in the
1544 * exception classification and the size of the memory operand.
1545 */
3d304620 1546 assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4);
20581aad 1547 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
3d304620 1548 e->vex_class = e->vex_class < 4 ? 3 : 5;
20581aad
PB
1549 if (s->vex_l) {
1550 goto illegal;
1551 }
1552 assert(decode->e.s2 == X86_SIZE_x);
1553 if (decode->op[2].has_ea) {
1554 decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64;
1555 }
1556 }
1557 break;
1558
1559 case X86_VEX_SSEUnaligned:
1560 /* handled in sse_needs_alignment. */
1561 break;
1562
1563 case X86_VEX_AVX2_256:
1564 if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) {
1565 goto illegal;
1566 }
1567 }
1568
20581aad
PB
1569 switch (e->vex_class) {
1570 case 0:
1571 if (s->prefix & PREFIX_VEX) {
1572 goto illegal;
1573 }
1574 return true;
1575 case 1:
1576 case 2:
1577 case 3:
1578 case 4:
1579 case 5:
1580 case 7:
1581 if (s->prefix & PREFIX_VEX) {
1582 if (!(s->flags & HF_AVX_EN_MASK)) {
1583 goto illegal;
1584 }
38e65936
PB
1585 } else if (e->special != X86_SPECIAL_MMX ||
1586 (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
20581aad
PB
1587 if (!(s->flags & HF_OSFXSR_MASK)) {
1588 goto illegal;
1589 }
1590 }
1591 break;
1592 case 12:
1593 /* Must have a VSIB byte and no address prefix. */
1594 assert(s->has_modrm);
1595 if ((s->modrm & 7) != 4 || s->aflag == MO_16) {
1596 goto illegal;
1597 }
1598
1599 /* Check no overlap between registers. */
1600 if (!decode->op[0].has_ea &&
1601 (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) {
1602 goto illegal;
1603 }
1604 assert(!decode->op[1].has_ea);
1605 if (decode->op[1].n == decode->mem.index) {
1606 goto illegal;
1607 }
1608 if (!decode->op[2].has_ea &&
1609 (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) {
1610 goto illegal;
1611 }
1612 /* fall through */
1613 case 6:
1614 case 11:
1615 if (!(s->prefix & PREFIX_VEX)) {
1616 goto illegal;
1617 }
1618 if (!(s->flags & HF_AVX_EN_MASK)) {
1619 goto illegal;
1620 }
1621 break;
1622 case 8:
ce4fcb94
PB
1623 /* Non-VEX case handled in decode_0F77. */
1624 assert(s->prefix & PREFIX_VEX);
20581aad
PB
1625 if (!(s->flags & HF_AVX_EN_MASK)) {
1626 goto illegal;
1627 }
1628 break;
1629 case 13:
1630 if (!(s->prefix & PREFIX_VEX)) {
1631 goto illegal;
1632 }
1633 if (s->vex_l) {
1634 goto illegal;
1635 }
1636 /* All integer instructions use VEX.vvvv, so exit. */
1637 return true;
1638 }
1639
1640 if (s->vex_v != 0 &&
1641 e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B &&
1642 e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B &&
1643 e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) {
1644 goto illegal;
1645 }
1646
1647 if (s->flags & HF_TS_MASK) {
1648 goto nm_exception;
1649 }
1650 if (s->flags & HF_EM_MASK) {
1651 goto illegal;
1652 }
183e6679 1653
e000687f
PB
1654 if (e->check) {
1655 if (e->check & X86_CHECK_VEX128) {
1656 if (s->vex_l) {
1657 goto illegal;
1658 }
1659 }
1660 if (e->check & X86_CHECK_W0) {
1661 if (s->vex_w) {
1662 goto illegal;
1663 }
1664 }
1665 if (e->check & X86_CHECK_W1) {
1666 if (!s->vex_w) {
1667 goto illegal;
1668 }
183e6679
PB
1669 }
1670 }
20581aad
PB
1671 return true;
1672
1673nm_exception:
1674 gen_NM_exception(s);
1675 return false;
1676illegal:
1677 gen_illegal_opcode(s);
1678 return false;
1679}
1680
b3e22b23
PB
1681/*
1682 * Convert one instruction. s->base.is_jmp is set if the translation must
1683 * be stopped.
1684 */
1685static void disas_insn_new(DisasContext *s, CPUState *cpu, int b)
1686{
b77af26e 1687 CPUX86State *env = cpu_env(cpu);
b3e22b23
PB
1688 bool first = true;
1689 X86DecodedInsn decode;
1690 X86DecodeFunc decode_func = decode_root;
1691
b3e22b23
PB
1692 s->has_modrm = false;
1693
1694 next_byte:
1695 if (first) {
1696 first = false;
1697 } else {
1698 b = x86_ldub_code(env, s);
1699 }
1700 /* Collect prefixes. */
1701 switch (b) {
1702 case 0xf3:
1703 s->prefix |= PREFIX_REPZ;
1704 s->prefix &= ~PREFIX_REPNZ;
1705 goto next_byte;
1706 case 0xf2:
1707 s->prefix |= PREFIX_REPNZ;
1708 s->prefix &= ~PREFIX_REPZ;
1709 goto next_byte;
1710 case 0xf0:
1711 s->prefix |= PREFIX_LOCK;
1712 goto next_byte;
1713 case 0x2e:
1714 s->override = R_CS;
1715 goto next_byte;
1716 case 0x36:
1717 s->override = R_SS;
1718 goto next_byte;
1719 case 0x3e:
1720 s->override = R_DS;
1721 goto next_byte;
1722 case 0x26:
1723 s->override = R_ES;
1724 goto next_byte;
1725 case 0x64:
1726 s->override = R_FS;
1727 goto next_byte;
1728 case 0x65:
1729 s->override = R_GS;
1730 goto next_byte;
1731 case 0x66:
1732 s->prefix |= PREFIX_DATA;
1733 goto next_byte;
1734 case 0x67:
1735 s->prefix |= PREFIX_ADR;
1736 goto next_byte;
1737#ifdef TARGET_X86_64
1738 case 0x40 ... 0x4f:
1739 if (CODE64(s)) {
1740 /* REX prefix */
1741 s->prefix |= PREFIX_REX;
1742 s->vex_w = (b >> 3) & 1;
1743 s->rex_r = (b & 0x4) << 1;
1744 s->rex_x = (b & 0x2) << 2;
1745 s->rex_b = (b & 0x1) << 3;
1746 goto next_byte;
1747 }
1748 break;
1749#endif
1750 case 0xc5: /* 2-byte VEX */
1751 case 0xc4: /* 3-byte VEX */
1752 /*
1753 * VEX prefixes cannot be used except in 32-bit mode.
1754 * Otherwise the instruction is LES or LDS.
1755 */
1756 if (CODE32(s) && !VM86(s)) {
1757 static const int pp_prefix[4] = {
1758 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
1759 };
1760 int vex3, vex2 = x86_ldub_code(env, s);
1761
1762 if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
1763 /*
1764 * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
1765 * otherwise the instruction is LES or LDS.
1766 */
1767 s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
1768 break;
1769 }
1770
1771 /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
1772 if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ
1773 | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
1774 goto illegal_op;
1775 }
1776#ifdef TARGET_X86_64
1777 s->rex_r = (~vex2 >> 4) & 8;
1778#endif
1779 if (b == 0xc5) {
1780 /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */
1781 vex3 = vex2;
1782 decode_func = decode_0F;
1783 } else {
1784 /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
1785 vex3 = x86_ldub_code(env, s);
1786#ifdef TARGET_X86_64
1787 s->rex_x = (~vex2 >> 3) & 8;
1788 s->rex_b = (~vex2 >> 2) & 8;
1789#endif
1790 s->vex_w = (vex3 >> 7) & 1;
1791 switch (vex2 & 0x1f) {
1792 case 0x01: /* Implied 0f leading opcode bytes. */
1793 decode_func = decode_0F;
1794 break;
1795 case 0x02: /* Implied 0f 38 leading opcode bytes. */
1796 decode_func = decode_0F38;
1797 break;
1798 case 0x03: /* Implied 0f 3a leading opcode bytes. */
1799 decode_func = decode_0F3A;
1800 break;
1801 default: /* Reserved for future use. */
1802 goto unknown_op;
1803 }
1804 }
1805 s->vex_v = (~vex3 >> 3) & 0xf;
1806 s->vex_l = (vex3 >> 2) & 1;
1807 s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX;
1808 }
1809 break;
1810 default:
1811 if (b >= 0x100) {
1812 b -= 0x100;
1813 decode_func = do_decode_0F;
1814 }
1815 break;
1816 }
1817
1818 /* Post-process prefixes. */
1819 if (CODE64(s)) {
1820 /*
1821 * In 64-bit mode, the default data size is 32-bit. Select 64-bit
1822 * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
1823 * over 0x66 if both are present.
1824 */
1825 s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32);
1826 /* In 64-bit mode, 0x67 selects 32-bit addressing. */
1827 s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64);
1828 } else {
1829 /* In 16/32-bit mode, 0x66 selects the opposite data size. */
1830 if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) {
1831 s->dflag = MO_32;
1832 } else {
1833 s->dflag = MO_16;
1834 }
1835 /* In 16/32-bit mode, 0x67 selects the opposite addressing. */
1836 if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) {
1837 s->aflag = MO_32;
1838 } else {
1839 s->aflag = MO_16;
1840 }
1841 }
1842
1843 memset(&decode, 0, sizeof(decode));
1844 decode.b = b;
1845 if (!decode_insn(s, env, decode_func, &decode)) {
1846 goto illegal_op;
1847 }
1848 if (!decode.e.gen) {
1849 goto unknown_op;
1850 }
1851
caa01fad
PB
1852 if (!has_cpuid_feature(s, decode.e.cpuid)) {
1853 goto illegal_op;
1854 }
1855
183e6679
PB
1856 /* Checks that result in #UD come first. */
1857 if (decode.e.check) {
1858 if (decode.e.check & X86_CHECK_i64) {
1859 if (CODE64(s)) {
1860 goto illegal_op;
1861 }
1862 }
1863 if (decode.e.check & X86_CHECK_o64) {
1864 if (!CODE64(s)) {
1865 goto illegal_op;
1866 }
1867 }
1868 if (decode.e.check & X86_CHECK_prot) {
1869 if (!PE(s) || VM86(s)) {
1870 goto illegal_op;
1871 }
1872 }
1873 }
1874
b3e22b23
PB
1875 switch (decode.e.special) {
1876 case X86_SPECIAL_None:
1877 break;
1878
1879 case X86_SPECIAL_Locked:
1880 if (decode.op[0].has_ea) {
1881 s->prefix |= PREFIX_LOCK;
1882 }
b609db94
PB
1883 decode.e.special = X86_SPECIAL_HasLock;
1884 /* fallthrough */
1885 case X86_SPECIAL_HasLock:
b3e22b23
PB
1886 break;
1887
5baf5641 1888 case X86_SPECIAL_Op0_Rd:
b3e22b23
PB
1889 assert(decode.op[0].unit == X86_OP_INT);
1890 if (!decode.op[0].has_ea) {
1891 decode.op[0].ot = MO_32;
1892 }
1893 break;
1894
5baf5641 1895 case X86_SPECIAL_Op2_Ry:
b3e22b23
PB
1896 assert(decode.op[2].unit == X86_OP_INT);
1897 if (!decode.op[2].has_ea) {
5baf5641 1898 decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag;
b3e22b23
PB
1899 }
1900 break;
1901
16fc5726
PB
1902 case X86_SPECIAL_AVXExtMov:
1903 if (!decode.op[2].has_ea) {
1904 decode.op[2].ot = s->vex_l ? MO_256 : MO_128;
1905 } else if (s->vex_l) {
1906 decode.op[2].ot++;
1907 }
1908 break;
1909
8a36bbcf
PB
1910 case X86_SPECIAL_SExtT0:
1911 case X86_SPECIAL_ZExtT0:
1912 /* Handled in gen_load. */
1913 assert(decode.op[1].unit == X86_OP_INT);
1914 break;
1915
b2ea6450 1916 default:
b3e22b23
PB
1917 break;
1918 }
1919
b609db94
PB
1920 if (s->prefix & PREFIX_LOCK) {
1921 if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) {
1922 goto illegal_op;
1923 }
1924 }
1925
20581aad
PB
1926 if (!validate_vex(s, &decode)) {
1927 return;
1928 }
183e6679
PB
1929
1930 /*
1931 * Checks that result in #GP or VMEXIT come second. Intercepts are
1932 * generally checked after non-memory exceptions (i.e. before all
1933 * exceptions if there is no memory operand). Exceptions are
1934 * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
1935 *
1936 * RSM and XSETBV will be handled in the gen_* functions
1937 * instead of using chk().
1938 */
1939 if (decode.e.check & X86_CHECK_cpl0) {
1940 if (CPL(s) != 0) {
1941 goto gp_fault;
1942 }
1943 }
1944 if (decode.e.intercept && unlikely(GUEST(s))) {
1945 gen_helper_svm_check_intercept(tcg_env,
1946 tcg_constant_i32(decode.e.intercept));
1947 }
1948 if (decode.e.check) {
1949 if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
1950 if (IOPL(s) < 3) {
1951 goto gp_fault;
1952 }
1953 } else if (decode.e.check & X86_CHECK_cpl_iopl) {
1954 if (IOPL(s) < CPL(s)) {
1955 goto gp_fault;
1956 }
1957 }
1958 }
1959
b2ea6450
MB
1960 if (decode.e.special == X86_SPECIAL_MMX &&
1961 !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) {
ad75a51e 1962 gen_helper_enter_mmx(tcg_env);
b2ea6450
MB
1963 }
1964
b3e22b23 1965 if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
20581aad 1966 gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
b3e22b23 1967 }
6ba13999 1968 if (s->prefix & PREFIX_LOCK) {
6ba13999
PB
1969 gen_load(s, &decode, 2, s->T1);
1970 decode.e.gen(s, env, &decode);
1971 } else {
1972 if (decode.op[0].unit == X86_OP_MMX) {
1973 compute_mmx_offset(&decode.op[0]);
1974 } else if (decode.op[0].unit == X86_OP_SSE) {
1975 compute_xmm_offset(&decode.op[0]);
1976 }
1977 gen_load(s, &decode, 1, s->T0);
1978 gen_load(s, &decode, 2, s->T1);
1979 decode.e.gen(s, env, &decode);
1980 gen_writeback(s, &decode, 0, s->T0);
1981 }
b3e22b23 1982 return;
183e6679
PB
1983 gp_fault:
1984 gen_exception_gpf(s);
1985 return;
b3e22b23
PB
1986 illegal_op:
1987 gen_illegal_opcode(s);
1988 return;
1989 unknown_op:
1990 gen_unknown_opcode(env, s);
1991}