]>
Commit | Line | Data |
---|---|---|
eb13296c MH |
1 | /* |
2 | * x86 instruction analysis | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | * | |
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | |
19 | */ | |
20 | ||
21 | #include <linux/string.h> | |
22 | #include <asm/inat.h> | |
23 | #include <asm/insn.h> | |
24 | ||
53a019a9 MH |
25 | /* Verify next sizeof(t) bytes can be on the same instruction */ |
26 | #define validate_next(t, insn, n) \ | |
27 | ((insn)->next_byte + sizeof(t) + n - (insn)->kaddr <= MAX_INSN_SIZE) | |
28 | ||
29 | #define __get_next(t, insn) \ | |
30 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | |
31 | ||
32 | #define __peek_nbyte_next(t, insn, n) \ | |
33 | ({ t r = *(t*)((insn)->next_byte + n); r; }) | |
eb13296c | 34 | |
53a019a9 MH |
35 | #define get_next(t, insn) \ |
36 | ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) | |
eb13296c | 37 | |
e0e492e9 | 38 | #define peek_nbyte_next(t, insn, n) \ |
53a019a9 MH |
39 | ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) |
40 | ||
41 | #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) | |
e0e492e9 | 42 | |
eb13296c MH |
43 | /** |
44 | * insn_init() - initialize struct insn | |
45 | * @insn: &struct insn to be initialized | |
46 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | |
47 | * @x86_64: !0 for 64-bit kernel or 64-bit app | |
48 | */ | |
49 | void insn_init(struct insn *insn, const void *kaddr, int x86_64) | |
50 | { | |
51 | memset(insn, 0, sizeof(*insn)); | |
52 | insn->kaddr = kaddr; | |
53 | insn->next_byte = kaddr; | |
54 | insn->x86_64 = x86_64 ? 1 : 0; | |
55 | insn->opnd_bytes = 4; | |
56 | if (x86_64) | |
57 | insn->addr_bytes = 8; | |
58 | else | |
59 | insn->addr_bytes = 4; | |
60 | } | |
61 | ||
62 | /** | |
63 | * insn_get_prefixes - scan x86 instruction prefix bytes | |
64 | * @insn: &struct insn containing instruction | |
65 | * | |
66 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | |
67 | * to point to the (first) opcode. No effect if @insn->prefixes.got | |
68 | * is already set. | |
69 | */ | |
70 | void insn_get_prefixes(struct insn *insn) | |
71 | { | |
72 | struct insn_field *prefixes = &insn->prefixes; | |
73 | insn_attr_t attr; | |
74 | insn_byte_t b, lb; | |
75 | int i, nb; | |
76 | ||
77 | if (prefixes->got) | |
78 | return; | |
79 | ||
80 | nb = 0; | |
81 | lb = 0; | |
82 | b = peek_next(insn_byte_t, insn); | |
83 | attr = inat_get_opcode_attribute(b); | |
04d46c1b | 84 | while (inat_is_legacy_prefix(attr)) { |
eb13296c MH |
85 | /* Skip if same prefix */ |
86 | for (i = 0; i < nb; i++) | |
87 | if (prefixes->bytes[i] == b) | |
88 | goto found; | |
89 | if (nb == 4) | |
90 | /* Invalid instruction */ | |
91 | break; | |
92 | prefixes->bytes[nb++] = b; | |
93 | if (inat_is_address_size_prefix(attr)) { | |
94 | /* address size switches 2/4 or 4/8 */ | |
95 | if (insn->x86_64) | |
96 | insn->addr_bytes ^= 12; | |
97 | else | |
98 | insn->addr_bytes ^= 6; | |
99 | } else if (inat_is_operand_size_prefix(attr)) { | |
100 | /* oprand size switches 2/4 */ | |
101 | insn->opnd_bytes ^= 6; | |
102 | } | |
103 | found: | |
104 | prefixes->nbytes++; | |
105 | insn->next_byte++; | |
106 | lb = b; | |
107 | b = peek_next(insn_byte_t, insn); | |
108 | attr = inat_get_opcode_attribute(b); | |
109 | } | |
110 | /* Set the last prefix */ | |
111 | if (lb && lb != insn->prefixes.bytes[3]) { | |
112 | if (unlikely(insn->prefixes.bytes[3])) { | |
113 | /* Swap the last prefix */ | |
114 | b = insn->prefixes.bytes[3]; | |
115 | for (i = 0; i < nb; i++) | |
116 | if (prefixes->bytes[i] == lb) | |
117 | prefixes->bytes[i] = b; | |
118 | } | |
119 | insn->prefixes.bytes[3] = lb; | |
120 | } | |
121 | ||
e0e492e9 | 122 | /* Decode REX prefix */ |
eb13296c MH |
123 | if (insn->x86_64) { |
124 | b = peek_next(insn_byte_t, insn); | |
125 | attr = inat_get_opcode_attribute(b); | |
126 | if (inat_is_rex_prefix(attr)) { | |
127 | insn->rex_prefix.value = b; | |
128 | insn->rex_prefix.nbytes = 1; | |
129 | insn->next_byte++; | |
130 | if (X86_REX_W(b)) | |
131 | /* REX.W overrides opnd_size */ | |
132 | insn->opnd_bytes = 8; | |
133 | } | |
134 | } | |
135 | insn->rex_prefix.got = 1; | |
e0e492e9 MH |
136 | |
137 | /* Decode VEX prefix */ | |
138 | b = peek_next(insn_byte_t, insn); | |
139 | attr = inat_get_opcode_attribute(b); | |
140 | if (inat_is_vex_prefix(attr)) { | |
141 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | |
142 | if (!insn->x86_64) { | |
143 | /* | |
144 | * In 32-bits mode, if the [7:6] bits (mod bits of | |
145 | * ModRM) on the second byte are not 11b, it is | |
146 | * LDS or LES. | |
147 | */ | |
148 | if (X86_MODRM_MOD(b2) != 3) | |
149 | goto vex_end; | |
150 | } | |
151 | insn->vex_prefix.bytes[0] = b; | |
152 | insn->vex_prefix.bytes[1] = b2; | |
153 | if (inat_is_vex3_prefix(attr)) { | |
154 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | |
155 | insn->vex_prefix.bytes[2] = b2; | |
156 | insn->vex_prefix.nbytes = 3; | |
157 | insn->next_byte += 3; | |
158 | if (insn->x86_64 && X86_VEX_W(b2)) | |
159 | /* VEX.W overrides opnd_size */ | |
160 | insn->opnd_bytes = 8; | |
161 | } else { | |
162 | insn->vex_prefix.nbytes = 2; | |
163 | insn->next_byte += 2; | |
164 | } | |
165 | } | |
166 | vex_end: | |
167 | insn->vex_prefix.got = 1; | |
168 | ||
eb13296c | 169 | prefixes->got = 1; |
53a019a9 MH |
170 | |
171 | err_out: | |
eb13296c MH |
172 | return; |
173 | } | |
174 | ||
175 | /** | |
176 | * insn_get_opcode - collect opcode(s) | |
177 | * @insn: &struct insn containing instruction | |
178 | * | |
179 | * Populates @insn->opcode, updates @insn->next_byte to point past the | |
180 | * opcode byte(s), and set @insn->attr (except for groups). | |
181 | * If necessary, first collects any preceding (prefix) bytes. | |
182 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | |
183 | * is already 1. | |
184 | */ | |
185 | void insn_get_opcode(struct insn *insn) | |
186 | { | |
187 | struct insn_field *opcode = &insn->opcode; | |
188 | insn_byte_t op, pfx; | |
189 | if (opcode->got) | |
190 | return; | |
191 | if (!insn->prefixes.got) | |
192 | insn_get_prefixes(insn); | |
193 | ||
194 | /* Get first opcode */ | |
195 | op = get_next(insn_byte_t, insn); | |
196 | opcode->bytes[0] = op; | |
197 | opcode->nbytes = 1; | |
e0e492e9 MH |
198 | |
199 | /* Check if there is VEX prefix or not */ | |
200 | if (insn_is_avx(insn)) { | |
201 | insn_byte_t m, p; | |
202 | m = insn_vex_m_bits(insn); | |
203 | p = insn_vex_p_bits(insn); | |
204 | insn->attr = inat_get_avx_attribute(op, m, p); | |
130b78b2 | 205 | if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) |
e0e492e9 MH |
206 | insn->attr = 0; /* This instruction is bad */ |
207 | goto end; /* VEX has only 1 byte for opcode */ | |
208 | } | |
209 | ||
eb13296c MH |
210 | insn->attr = inat_get_opcode_attribute(op); |
211 | while (inat_is_escape(insn->attr)) { | |
212 | /* Get escaped opcode */ | |
213 | op = get_next(insn_byte_t, insn); | |
214 | opcode->bytes[opcode->nbytes++] = op; | |
215 | pfx = insn_last_prefix(insn); | |
216 | insn->attr = inat_get_escape_attribute(op, pfx, insn->attr); | |
217 | } | |
e0e492e9 MH |
218 | if (inat_must_vex(insn->attr)) |
219 | insn->attr = 0; /* This instruction is bad */ | |
220 | end: | |
eb13296c | 221 | opcode->got = 1; |
53a019a9 MH |
222 | |
223 | err_out: | |
224 | return; | |
eb13296c MH |
225 | } |
226 | ||
227 | /** | |
228 | * insn_get_modrm - collect ModRM byte, if any | |
229 | * @insn: &struct insn containing instruction | |
230 | * | |
231 | * Populates @insn->modrm and updates @insn->next_byte to point past the | |
232 | * ModRM byte, if any. If necessary, first collects the preceding bytes | |
233 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | |
234 | */ | |
235 | void insn_get_modrm(struct insn *insn) | |
236 | { | |
237 | struct insn_field *modrm = &insn->modrm; | |
238 | insn_byte_t pfx, mod; | |
239 | if (modrm->got) | |
240 | return; | |
241 | if (!insn->opcode.got) | |
242 | insn_get_opcode(insn); | |
243 | ||
244 | if (inat_has_modrm(insn->attr)) { | |
245 | mod = get_next(insn_byte_t, insn); | |
246 | modrm->value = mod; | |
247 | modrm->nbytes = 1; | |
248 | if (inat_is_group(insn->attr)) { | |
249 | pfx = insn_last_prefix(insn); | |
250 | insn->attr = inat_get_group_attribute(mod, pfx, | |
251 | insn->attr); | |
130b78b2 MH |
252 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) |
253 | insn->attr = 0; /* This is bad */ | |
eb13296c MH |
254 | } |
255 | } | |
256 | ||
257 | if (insn->x86_64 && inat_is_force64(insn->attr)) | |
258 | insn->opnd_bytes = 8; | |
259 | modrm->got = 1; | |
53a019a9 MH |
260 | |
261 | err_out: | |
262 | return; | |
eb13296c MH |
263 | } |
264 | ||
265 | ||
266 | /** | |
267 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | |
268 | * @insn: &struct insn containing instruction | |
269 | * | |
270 | * If necessary, first collects the instruction up to and including the | |
271 | * ModRM byte. No effect if @insn->x86_64 is 0. | |
272 | */ | |
273 | int insn_rip_relative(struct insn *insn) | |
274 | { | |
275 | struct insn_field *modrm = &insn->modrm; | |
276 | ||
277 | if (!insn->x86_64) | |
278 | return 0; | |
279 | if (!modrm->got) | |
280 | insn_get_modrm(insn); | |
281 | /* | |
282 | * For rip-relative instructions, the mod field (top 2 bits) | |
283 | * is zero and the r/m field (bottom 3 bits) is 0x5. | |
284 | */ | |
285 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | |
286 | } | |
287 | ||
288 | /** | |
289 | * insn_get_sib() - Get the SIB byte of instruction | |
290 | * @insn: &struct insn containing instruction | |
291 | * | |
292 | * If necessary, first collects the instruction up to and including the | |
293 | * ModRM byte. | |
294 | */ | |
295 | void insn_get_sib(struct insn *insn) | |
296 | { | |
297 | insn_byte_t modrm; | |
298 | ||
299 | if (insn->sib.got) | |
300 | return; | |
301 | if (!insn->modrm.got) | |
302 | insn_get_modrm(insn); | |
303 | if (insn->modrm.nbytes) { | |
304 | modrm = (insn_byte_t)insn->modrm.value; | |
305 | if (insn->addr_bytes != 2 && | |
306 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | |
307 | insn->sib.value = get_next(insn_byte_t, insn); | |
308 | insn->sib.nbytes = 1; | |
309 | } | |
310 | } | |
311 | insn->sib.got = 1; | |
53a019a9 MH |
312 | |
313 | err_out: | |
314 | return; | |
eb13296c MH |
315 | } |
316 | ||
317 | ||
318 | /** | |
319 | * insn_get_displacement() - Get the displacement of instruction | |
320 | * @insn: &struct insn containing instruction | |
321 | * | |
322 | * If necessary, first collects the instruction up to and including the | |
323 | * SIB byte. | |
324 | * Displacement value is sign-expanded. | |
325 | */ | |
326 | void insn_get_displacement(struct insn *insn) | |
327 | { | |
328 | insn_byte_t mod, rm, base; | |
329 | ||
330 | if (insn->displacement.got) | |
331 | return; | |
332 | if (!insn->sib.got) | |
333 | insn_get_sib(insn); | |
334 | if (insn->modrm.nbytes) { | |
335 | /* | |
336 | * Interpreting the modrm byte: | |
337 | * mod = 00 - no displacement fields (exceptions below) | |
338 | * mod = 01 - 1-byte displacement field | |
339 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | |
340 | * address size = 2 (0x67 prefix in 32-bit mode) | |
341 | * mod = 11 - no memory operand | |
342 | * | |
343 | * If address size = 2... | |
344 | * mod = 00, r/m = 110 - displacement field is 2 bytes | |
345 | * | |
346 | * If address size != 2... | |
347 | * mod != 11, r/m = 100 - SIB byte exists | |
348 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | |
349 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | |
350 | * field is 4 bytes | |
351 | */ | |
352 | mod = X86_MODRM_MOD(insn->modrm.value); | |
353 | rm = X86_MODRM_RM(insn->modrm.value); | |
354 | base = X86_SIB_BASE(insn->sib.value); | |
355 | if (mod == 3) | |
356 | goto out; | |
357 | if (mod == 1) { | |
358 | insn->displacement.value = get_next(char, insn); | |
359 | insn->displacement.nbytes = 1; | |
360 | } else if (insn->addr_bytes == 2) { | |
361 | if ((mod == 0 && rm == 6) || mod == 2) { | |
362 | insn->displacement.value = | |
363 | get_next(short, insn); | |
364 | insn->displacement.nbytes = 2; | |
365 | } | |
366 | } else { | |
367 | if ((mod == 0 && rm == 5) || mod == 2 || | |
368 | (mod == 0 && base == 5)) { | |
369 | insn->displacement.value = get_next(int, insn); | |
370 | insn->displacement.nbytes = 4; | |
371 | } | |
372 | } | |
373 | } | |
374 | out: | |
375 | insn->displacement.got = 1; | |
53a019a9 MH |
376 | |
377 | err_out: | |
378 | return; | |
eb13296c MH |
379 | } |
380 | ||
381 | /* Decode moffset16/32/64 */ | |
382 | static void __get_moffset(struct insn *insn) | |
383 | { | |
384 | switch (insn->addr_bytes) { | |
385 | case 2: | |
386 | insn->moffset1.value = get_next(short, insn); | |
387 | insn->moffset1.nbytes = 2; | |
388 | break; | |
389 | case 4: | |
390 | insn->moffset1.value = get_next(int, insn); | |
391 | insn->moffset1.nbytes = 4; | |
392 | break; | |
393 | case 8: | |
394 | insn->moffset1.value = get_next(int, insn); | |
395 | insn->moffset1.nbytes = 4; | |
396 | insn->moffset2.value = get_next(int, insn); | |
397 | insn->moffset2.nbytes = 4; | |
398 | break; | |
399 | } | |
400 | insn->moffset1.got = insn->moffset2.got = 1; | |
53a019a9 MH |
401 | |
402 | err_out: | |
403 | return; | |
eb13296c MH |
404 | } |
405 | ||
406 | /* Decode imm v32(Iz) */ | |
407 | static void __get_immv32(struct insn *insn) | |
408 | { | |
409 | switch (insn->opnd_bytes) { | |
410 | case 2: | |
411 | insn->immediate.value = get_next(short, insn); | |
412 | insn->immediate.nbytes = 2; | |
413 | break; | |
414 | case 4: | |
415 | case 8: | |
416 | insn->immediate.value = get_next(int, insn); | |
417 | insn->immediate.nbytes = 4; | |
418 | break; | |
419 | } | |
53a019a9 MH |
420 | |
421 | err_out: | |
422 | return; | |
eb13296c MH |
423 | } |
424 | ||
425 | /* Decode imm v64(Iv/Ov) */ | |
426 | static void __get_immv(struct insn *insn) | |
427 | { | |
428 | switch (insn->opnd_bytes) { | |
429 | case 2: | |
430 | insn->immediate1.value = get_next(short, insn); | |
431 | insn->immediate1.nbytes = 2; | |
432 | break; | |
433 | case 4: | |
434 | insn->immediate1.value = get_next(int, insn); | |
435 | insn->immediate1.nbytes = 4; | |
436 | break; | |
437 | case 8: | |
438 | insn->immediate1.value = get_next(int, insn); | |
439 | insn->immediate1.nbytes = 4; | |
440 | insn->immediate2.value = get_next(int, insn); | |
441 | insn->immediate2.nbytes = 4; | |
442 | break; | |
443 | } | |
444 | insn->immediate1.got = insn->immediate2.got = 1; | |
53a019a9 MH |
445 | |
446 | err_out: | |
447 | return; | |
eb13296c MH |
448 | } |
449 | ||
450 | /* Decode ptr16:16/32(Ap) */ | |
451 | static void __get_immptr(struct insn *insn) | |
452 | { | |
453 | switch (insn->opnd_bytes) { | |
454 | case 2: | |
455 | insn->immediate1.value = get_next(short, insn); | |
456 | insn->immediate1.nbytes = 2; | |
457 | break; | |
458 | case 4: | |
459 | insn->immediate1.value = get_next(int, insn); | |
460 | insn->immediate1.nbytes = 4; | |
461 | break; | |
462 | case 8: | |
463 | /* ptr16:64 is not exist (no segment) */ | |
464 | return; | |
465 | } | |
466 | insn->immediate2.value = get_next(unsigned short, insn); | |
467 | insn->immediate2.nbytes = 2; | |
468 | insn->immediate1.got = insn->immediate2.got = 1; | |
53a019a9 MH |
469 | |
470 | err_out: | |
471 | return; | |
eb13296c MH |
472 | } |
473 | ||
474 | /** | |
475 | * insn_get_immediate() - Get the immediates of instruction | |
476 | * @insn: &struct insn containing instruction | |
477 | * | |
478 | * If necessary, first collects the instruction up to and including the | |
479 | * displacement bytes. | |
480 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | |
481 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | |
482 | */ | |
483 | void insn_get_immediate(struct insn *insn) | |
484 | { | |
485 | if (insn->immediate.got) | |
486 | return; | |
487 | if (!insn->displacement.got) | |
488 | insn_get_displacement(insn); | |
489 | ||
490 | if (inat_has_moffset(insn->attr)) { | |
491 | __get_moffset(insn); | |
492 | goto done; | |
493 | } | |
494 | ||
495 | if (!inat_has_immediate(insn->attr)) | |
496 | /* no immediates */ | |
497 | goto done; | |
498 | ||
499 | switch (inat_immediate_size(insn->attr)) { | |
500 | case INAT_IMM_BYTE: | |
501 | insn->immediate.value = get_next(char, insn); | |
502 | insn->immediate.nbytes = 1; | |
503 | break; | |
504 | case INAT_IMM_WORD: | |
505 | insn->immediate.value = get_next(short, insn); | |
506 | insn->immediate.nbytes = 2; | |
507 | break; | |
508 | case INAT_IMM_DWORD: | |
509 | insn->immediate.value = get_next(int, insn); | |
510 | insn->immediate.nbytes = 4; | |
511 | break; | |
512 | case INAT_IMM_QWORD: | |
513 | insn->immediate1.value = get_next(int, insn); | |
514 | insn->immediate1.nbytes = 4; | |
515 | insn->immediate2.value = get_next(int, insn); | |
516 | insn->immediate2.nbytes = 4; | |
517 | break; | |
518 | case INAT_IMM_PTR: | |
519 | __get_immptr(insn); | |
520 | break; | |
521 | case INAT_IMM_VWORD32: | |
522 | __get_immv32(insn); | |
523 | break; | |
524 | case INAT_IMM_VWORD: | |
525 | __get_immv(insn); | |
526 | break; | |
527 | default: | |
528 | break; | |
529 | } | |
530 | if (inat_has_second_immediate(insn->attr)) { | |
531 | insn->immediate2.value = get_next(char, insn); | |
532 | insn->immediate2.nbytes = 1; | |
533 | } | |
534 | done: | |
535 | insn->immediate.got = 1; | |
53a019a9 MH |
536 | |
537 | err_out: | |
538 | return; | |
eb13296c MH |
539 | } |
540 | ||
541 | /** | |
542 | * insn_get_length() - Get the length of instruction | |
543 | * @insn: &struct insn containing instruction | |
544 | * | |
545 | * If necessary, first collects the instruction up to and including the | |
546 | * immediates bytes. | |
547 | */ | |
548 | void insn_get_length(struct insn *insn) | |
549 | { | |
550 | if (insn->length) | |
551 | return; | |
552 | if (!insn->immediate.got) | |
553 | insn_get_immediate(insn); | |
554 | insn->length = (unsigned char)((unsigned long)insn->next_byte | |
555 | - (unsigned long)insn->kaddr); | |
556 | } |