]>
Commit | Line | Data |
---|---|---|
eb13296c MH |
1 | #!/bin/awk -f |
2 | # gen-insn-attr-x86.awk: Instruction attribute table generator | |
3 | # Written by Masami Hiramatsu <mhiramat@redhat.com> | |
4 | # | |
5 | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | |
6 | ||
69d991f3 MH |
7 | # Awk implementation sanity check |
8 | function check_awk_implement() { | |
69d991f3 MH |
9 | if (sprintf("%x", 0) != "0") |
10 | return "Your awk has a printf-format problem." | |
11 | return "" | |
12 | } | |
13 | ||
e0e492e9 MH |
14 | # Clear working vars |
15 | function clear_vars() { | |
16 | delete table | |
17 | delete lptable2 | |
18 | delete lptable1 | |
19 | delete lptable3 | |
20 | eid = -1 # escape id | |
21 | gid = -1 # group id | |
22 | aid = -1 # AVX id | |
23 | tname = "" | |
24 | } | |
25 | ||
eb13296c | 26 | BEGIN { |
69d991f3 MH |
27 | # Implementation error checking |
28 | awkchecked = check_awk_implement() | |
29 | if (awkchecked != "") { | |
30 | print "Error: " awkchecked > "/dev/stderr" | |
31 | print "Please try to use gawk." > "/dev/stderr" | |
32 | exit 1 | |
33 | } | |
34 | ||
35 | # Setup generating tables | |
eb13296c | 36 | print "/* x86 opcode map generated from x86-opcode-map.txt */" |
e0e492e9 | 37 | print "/* Do not change this code. */\n" |
eb13296c MH |
38 | ggid = 1 |
39 | geid = 1 | |
e0e492e9 MH |
40 | gaid = 0 |
41 | delete etable | |
42 | delete gtable | |
43 | delete atable | |
eb13296c | 44 | |
4beb3d6d | 45 | opnd_expr = "^[A-Za-z/]" |
eb13296c MH |
46 | ext_expr = "^\\(" |
47 | sep_expr = "^\\|$" | |
4beb3d6d | 48 | group_expr = "^Grp[0-9A-Za-z]+" |
eb13296c | 49 | |
a9c373d0 | 50 | imm_expr = "^[IJAOL][a-z]" |
eb13296c MH |
51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | |
53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | |
54 | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | |
55 | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | |
56 | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | |
57 | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | |
58 | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | |
59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | |
60 | imm_flag["Ob"] = "INAT_MOFFSET" | |
61 | imm_flag["Ov"] = "INAT_MOFFSET" | |
a9c373d0 | 62 | imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" |
eb13296c | 63 | |
4beb3d6d | 64 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" |
eb13296c MH |
65 | force64_expr = "\\([df]64\\)" |
66 | rex_expr = "^REX(\\.[XRWB]+)*" | |
67 | fpu_expr = "^ESC" # TODO | |
68 | ||
436d03fa | 69 | lprefix1_expr = "\\((66|!F3)\\)" |
e0e492e9 | 70 | lprefix2_expr = "\\(F3\\)" |
3e21bb09 | 71 | lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" |
436d03fa | 72 | lprefix_expr = "\\((66|F2|F3)\\)" |
eb13296c MH |
73 | max_lprefix = 4 |
74 | ||
25af37f4 | 75 | # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript |
a9c373d0 | 76 | # accepts VEX prefix |
25af37f4 | 77 | vexok_opcode_expr = "^[vk].*" |
a9c373d0 MH |
78 | vexok_expr = "\\(v1\\)" |
79 | # All opcodes with (v) superscript supports *only* VEX prefix | |
80 | vexonly_expr = "\\(v\\)" | |
25af37f4 AH |
81 | # All opcodes with (ev) superscript supports *only* EVEX prefix |
82 | evexonly_expr = "\\(ev\\)" | |
e0e492e9 | 83 | |
eb13296c MH |
84 | prefix_expr = "\\(Prefix\\)" |
85 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | |
86 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | |
87 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | |
3e21bb09 MH |
88 | prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" |
89 | prefix_num["XRELEASE"] = "INAT_PFX_REPE" | |
eb13296c MH |
90 | prefix_num["LOCK"] = "INAT_PFX_LOCK" |
91 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | |
92 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | |
93 | prefix_num["SEG=ES"] = "INAT_PFX_ES" | |
94 | prefix_num["SEG=FS"] = "INAT_PFX_FS" | |
95 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | |
96 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | |
97 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | |
a9c373d0 MH |
98 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" |
99 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" | |
25af37f4 | 100 | prefix_num["EVEX"] = "INAT_PFX_EVEX" |
eb13296c | 101 | |
e0e492e9 | 102 | clear_vars() |
eb13296c MH |
103 | } |
104 | ||
105 | function semantic_error(msg) { | |
106 | print "Semantic error at " NR ": " msg > "/dev/stderr" | |
107 | exit 1 | |
108 | } | |
109 | ||
110 | function debug(msg) { | |
111 | print "DEBUG: " msg | |
112 | } | |
113 | ||
114 | function array_size(arr, i,c) { | |
115 | c = 0 | |
116 | for (i in arr) | |
117 | c++ | |
118 | return c | |
119 | } | |
120 | ||
121 | /^Table:/ { | |
122 | print "/* " $0 " */" | |
e0e492e9 MH |
123 | if (tname != "") |
124 | semantic_error("Hit Table: before EndTable:."); | |
eb13296c MH |
125 | } |
126 | ||
127 | /^Referrer:/ { | |
e0e492e9 | 128 | if (NF != 1) { |
eb13296c MH |
129 | # escape opcode table |
130 | ref = "" | |
131 | for (i = 2; i <= NF; i++) | |
132 | ref = ref $i | |
133 | eid = escape[ref] | |
134 | tname = sprintf("inat_escape_table_%d", eid) | |
135 | } | |
136 | } | |
137 | ||
e0e492e9 MH |
138 | /^AVXcode:/ { |
139 | if (NF != 1) { | |
140 | # AVX/escape opcode table | |
141 | aid = $2 | |
142 | if (gaid <= aid) | |
143 | gaid = aid + 1 | |
144 | if (tname == "") # AVX only opcode table | |
145 | tname = sprintf("inat_avx_table_%d", $2) | |
146 | } | |
147 | if (aid == -1 && eid == -1) # primary opcode table | |
148 | tname = "inat_primary_table" | |
149 | } | |
150 | ||
eb13296c MH |
151 | /^GrpTable:/ { |
152 | print "/* " $0 " */" | |
153 | if (!($2 in group)) | |
154 | semantic_error("No group: " $2 ) | |
155 | gid = group[$2] | |
156 | tname = "inat_group_table_" gid | |
157 | } | |
158 | ||
159 | function print_table(tbl,name,fmt,n) | |
160 | { | |
161 | print "const insn_attr_t " name " = {" | |
162 | for (i = 0; i < n; i++) { | |
163 | id = sprintf(fmt, i) | |
164 | if (tbl[id]) | |
165 | print " [" id "] = " tbl[id] "," | |
166 | } | |
167 | print "};" | |
168 | } | |
169 | ||
170 | /^EndTable/ { | |
171 | if (gid != -1) { | |
172 | # print group tables | |
173 | if (array_size(table) != 0) { | |
174 | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | |
175 | "0x%x", 8) | |
176 | gtable[gid,0] = tname | |
177 | } | |
178 | if (array_size(lptable1) != 0) { | |
179 | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | |
180 | "0x%x", 8) | |
181 | gtable[gid,1] = tname "_1" | |
182 | } | |
183 | if (array_size(lptable2) != 0) { | |
184 | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | |
185 | "0x%x", 8) | |
186 | gtable[gid,2] = tname "_2" | |
187 | } | |
188 | if (array_size(lptable3) != 0) { | |
189 | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | |
190 | "0x%x", 8) | |
191 | gtable[gid,3] = tname "_3" | |
192 | } | |
193 | } else { | |
194 | # print primary/escaped tables | |
195 | if (array_size(table) != 0) { | |
196 | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | |
197 | "0x%02x", 256) | |
198 | etable[eid,0] = tname | |
e0e492e9 MH |
199 | if (aid >= 0) |
200 | atable[aid,0] = tname | |
eb13296c MH |
201 | } |
202 | if (array_size(lptable1) != 0) { | |
203 | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | |
204 | "0x%02x", 256) | |
205 | etable[eid,1] = tname "_1" | |
e0e492e9 MH |
206 | if (aid >= 0) |
207 | atable[aid,1] = tname "_1" | |
eb13296c MH |
208 | } |
209 | if (array_size(lptable2) != 0) { | |
210 | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | |
211 | "0x%02x", 256) | |
212 | etable[eid,2] = tname "_2" | |
e0e492e9 MH |
213 | if (aid >= 0) |
214 | atable[aid,2] = tname "_2" | |
eb13296c MH |
215 | } |
216 | if (array_size(lptable3) != 0) { | |
217 | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | |
218 | "0x%02x", 256) | |
219 | etable[eid,3] = tname "_3" | |
e0e492e9 MH |
220 | if (aid >= 0) |
221 | atable[aid,3] = tname "_3" | |
eb13296c MH |
222 | } |
223 | } | |
224 | print "" | |
e0e492e9 | 225 | clear_vars() |
eb13296c MH |
226 | } |
227 | ||
228 | function add_flags(old,new) { | |
229 | if (old && new) | |
230 | return old " | " new | |
231 | else if (old) | |
232 | return old | |
233 | else | |
234 | return new | |
235 | } | |
236 | ||
237 | # convert operands to flags. | |
23637568 | 238 | function convert_operands(count,opnd, i,j,imm,mod) |
eb13296c MH |
239 | { |
240 | imm = null | |
241 | mod = null | |
23637568 JN |
242 | for (j = 1; j <= count; j++) { |
243 | i = opnd[j] | |
eb13296c MH |
244 | if (match(i, imm_expr) == 1) { |
245 | if (!imm_flag[i]) | |
246 | semantic_error("Unknown imm opnd: " i) | |
247 | if (imm) { | |
248 | if (i != "Ib") | |
249 | semantic_error("Second IMM error") | |
250 | imm = add_flags(imm, "INAT_SCNDIMM") | |
251 | } else | |
252 | imm = imm_flag[i] | |
253 | } else if (match(i, modrm_expr)) | |
254 | mod = "INAT_MODRM" | |
255 | } | |
256 | return add_flags(imm, mod) | |
257 | } | |
258 | ||
259 | /^[0-9a-f]+\:/ { | |
260 | if (NR == 1) | |
261 | next | |
262 | # get index | |
263 | idx = "0x" substr($1, 1, index($1,":") - 1) | |
264 | if (idx in table) | |
265 | semantic_error("Redefine " idx " in " tname) | |
266 | ||
267 | # check if escaped opcode | |
268 | if ("escape" == $2) { | |
269 | if ($3 != "#") | |
270 | semantic_error("No escaped name") | |
271 | ref = "" | |
272 | for (i = 4; i <= NF; i++) | |
273 | ref = ref $i | |
274 | if (ref in escape) | |
275 | semantic_error("Redefine escape (" ref ")") | |
276 | escape[ref] = geid | |
277 | geid++ | |
278 | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | |
279 | next | |
280 | } | |
281 | ||
282 | variant = null | |
283 | # converts | |
284 | i = 2 | |
285 | while (i <= NF) { | |
286 | opcode = $(i++) | |
287 | delete opnds | |
288 | ext = null | |
289 | flags = null | |
290 | opnd = null | |
291 | # parse one opcode | |
292 | if (match($i, opnd_expr)) { | |
293 | opnd = $i | |
23637568 JN |
294 | count = split($(i++), opnds, ",") |
295 | flags = convert_operands(count, opnds) | |
eb13296c MH |
296 | } |
297 | if (match($i, ext_expr)) | |
298 | ext = $(i++) | |
299 | if (match($i, sep_expr)) | |
300 | i++ | |
301 | else if (i < NF) | |
302 | semantic_error($i " is not a separator") | |
303 | ||
304 | # check if group opcode | |
305 | if (match(opcode, group_expr)) { | |
306 | if (!(opcode in group)) { | |
307 | group[opcode] = ggid | |
308 | ggid++ | |
309 | } | |
310 | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | |
311 | } | |
312 | # check force(or default) 64bit | |
313 | if (match(ext, force64_expr)) | |
314 | flags = add_flags(flags, "INAT_FORCE64") | |
315 | ||
316 | # check REX prefix | |
317 | if (match(opcode, rex_expr)) | |
04d46c1b | 318 | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") |
eb13296c MH |
319 | |
320 | # check coprocessor escape : TODO | |
321 | if (match(opcode, fpu_expr)) | |
322 | flags = add_flags(flags, "INAT_MODRM") | |
323 | ||
a9c373d0 | 324 | # check VEX codes |
25af37f4 AH |
325 | if (match(ext, evexonly_expr)) |
326 | flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY") | |
327 | else if (match(ext, vexonly_expr)) | |
e0e492e9 | 328 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") |
a9c373d0 | 329 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) |
e0e492e9 MH |
330 | flags = add_flags(flags, "INAT_VEXOK") |
331 | ||
eb13296c MH |
332 | # check prefixes |
333 | if (match(ext, prefix_expr)) { | |
334 | if (!prefix_num[opcode]) | |
335 | semantic_error("Unknown prefix: " opcode) | |
336 | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | |
337 | } | |
338 | if (length(flags) == 0) | |
339 | continue | |
340 | # check if last prefix | |
341 | if (match(ext, lprefix1_expr)) { | |
342 | lptable1[idx] = add_flags(lptable1[idx],flags) | |
343 | variant = "INAT_VARIANT" | |
436d03fa MH |
344 | } |
345 | if (match(ext, lprefix2_expr)) { | |
eb13296c MH |
346 | lptable2[idx] = add_flags(lptable2[idx],flags) |
347 | variant = "INAT_VARIANT" | |
436d03fa MH |
348 | } |
349 | if (match(ext, lprefix3_expr)) { | |
eb13296c MH |
350 | lptable3[idx] = add_flags(lptable3[idx],flags) |
351 | variant = "INAT_VARIANT" | |
436d03fa MH |
352 | } |
353 | if (!match(ext, lprefix_expr)){ | |
eb13296c MH |
354 | table[idx] = add_flags(table[idx],flags) |
355 | } | |
356 | } | |
357 | if (variant) | |
358 | table[idx] = add_flags(table[idx],variant) | |
359 | } | |
360 | ||
361 | END { | |
69d991f3 MH |
362 | if (awkchecked != "") |
363 | exit 1 | |
eb13296c MH |
364 | # print escape opcode map's array |
365 | print "/* Escape opcode map array */" | |
28a79389 | 366 | print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ |
04d46c1b | 367 | "[INAT_LSTPFX_MAX + 1] = {" |
eb13296c MH |
368 | for (i = 0; i < geid; i++) |
369 | for (j = 0; j < max_lprefix; j++) | |
370 | if (etable[i,j]) | |
371 | print " ["i"]["j"] = "etable[i,j]"," | |
372 | print "};\n" | |
373 | # print group opcode map's array | |
374 | print "/* Group opcode map array */" | |
28a79389 | 375 | print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ |
04d46c1b | 376 | "[INAT_LSTPFX_MAX + 1] = {" |
eb13296c MH |
377 | for (i = 0; i < ggid; i++) |
378 | for (j = 0; j < max_lprefix; j++) | |
379 | if (gtable[i,j]) | |
380 | print " ["i"]["j"] = "gtable[i,j]"," | |
e0e492e9 MH |
381 | print "};\n" |
382 | # print AVX opcode map's array | |
383 | print "/* AVX opcode map array */" | |
28a79389 | 384 | print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ |
e0e492e9 MH |
385 | "[INAT_LSTPFX_MAX + 1] = {" |
386 | for (i = 0; i < gaid; i++) | |
387 | for (j = 0; j < max_lprefix; j++) | |
388 | if (atable[i,j]) | |
389 | print " ["i"]["j"] = "atable[i,j]"," | |
eb13296c MH |
390 | print "};" |
391 | } | |
e0e492e9 | 392 |