]>
Commit | Line | Data |
---|---|---|
3d004a37 | 1 | #!/usr/bin/env python3 |
568ae7ef RH |
2 | # Copyright (c) 2018 Linaro Limited |
3 | # | |
4 | # This library is free software; you can redistribute it and/or | |
5 | # modify it under the terms of the GNU Lesser General Public | |
6 | # License as published by the Free Software Foundation; either | |
d6ea4236 | 7 | # version 2.1 of the License, or (at your option) any later version. |
568ae7ef RH |
8 | # |
9 | # This library is distributed in the hope that it will be useful, | |
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | # Lesser General Public License for more details. | |
13 | # | |
14 | # You should have received a copy of the GNU Lesser General Public | |
15 | # License along with this library; if not, see <http://www.gnu.org/licenses/>. | |
16 | # | |
17 | ||
18 | # | |
19 | # Generate a decoding tree from a specification file. | |
3fdbf5d6 | 20 | # See the syntax and semantics in docs/devel/decodetree.rst. |
568ae7ef RH |
21 | # |
22 | ||
4cacecaa | 23 | import io |
568ae7ef RH |
24 | import os |
25 | import re | |
26 | import sys | |
27 | import getopt | |
568ae7ef RH |
28 | |
29 | insnwidth = 32 | |
60c425f3 | 30 | bitop_width = 32 |
568ae7ef | 31 | insnmask = 0xffffffff |
17560e93 | 32 | variablewidth = False |
568ae7ef RH |
33 | fields = {} |
34 | arguments = {} | |
35 | formats = {} | |
0eff2df4 | 36 | allpatterns = [] |
c6920795 | 37 | anyextern = False |
9b5acc56 | 38 | testforerror = False |
568ae7ef RH |
39 | |
40 | translate_prefix = 'trans' | |
41 | translate_scope = 'static ' | |
42 | input_file = '' | |
43 | output_file = None | |
44 | output_fd = None | |
c6a5fc2a | 45 | output_null = False |
568ae7ef | 46 | insntype = 'uint32_t' |
abd04f92 | 47 | decode_function = 'decode' |
568ae7ef | 48 | |
acfdd239 RH |
49 | # An identifier for C. |
50 | re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' | |
568ae7ef | 51 | |
acfdd239 RH |
52 | # Identifiers for Arguments, Fields, Formats and Patterns. |
53 | re_arg_ident = '&[a-zA-Z0-9_]*' | |
54 | re_fld_ident = '%[a-zA-Z0-9_]*' | |
55 | re_fmt_ident = '@[a-zA-Z0-9_]*' | |
56 | re_pat_ident = '[a-zA-Z0-9_]*' | |
568ae7ef | 57 | |
36d61244 PM |
58 | # Local implementation of a topological sort. We use the same API that |
59 | # the Python graphlib does, so that when QEMU moves forward to a | |
60 | # baseline of Python 3.9 or newer this code can all be dropped and | |
61 | # replaced with: | |
62 | # from graphlib import TopologicalSorter, CycleError | |
63 | # | |
64 | # https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter | |
65 | # | |
66 | # We only implement the parts of TopologicalSorter we care about: | |
67 | # ts = TopologicalSorter(graph=None) | |
68 | # create the sorter. graph is a dictionary whose keys are | |
69 | # nodes and whose values are lists of the predecessors of that node. | |
70 | # (That is, if graph contains "A" -> ["B", "C"] then we must output | |
71 | # B and C before A.) | |
72 | # ts.static_order() | |
73 | # returns a list of all the nodes in sorted order, or raises CycleError | |
74 | # CycleError | |
75 | # exception raised if there are cycles in the graph. The second | |
76 | # element in the args attribute is a list of nodes which form a | |
77 | # cycle; the first and last element are the same, eg [a, b, c, a] | |
78 | # (Our implementation doesn't give the order correctly.) | |
79 | # | |
80 | # For our purposes we can assume that the data set is always small | |
81 | # (typically 10 nodes or less, actual links in the graph very rare), | |
82 | # so we don't need to worry about efficiency of implementation. | |
83 | # | |
84 | # The core of this implementation is from | |
85 | # https://code.activestate.com/recipes/578272-topological-sort/ | |
86 | # (but updated to Python 3), and is under the MIT license. | |
87 | ||
88 | class CycleError(ValueError): | |
89 | """Subclass of ValueError raised if cycles exist in the graph""" | |
90 | pass | |
91 | ||
92 | class TopologicalSorter: | |
93 | """Topologically sort a graph""" | |
94 | def __init__(self, graph=None): | |
95 | self.graph = graph | |
96 | ||
97 | def static_order(self): | |
98 | # We do the sort right here, unlike the stdlib version | |
99 | from functools import reduce | |
100 | data = {} | |
101 | r = [] | |
102 | ||
103 | if not self.graph: | |
104 | return [] | |
105 | ||
106 | # This code wants the values in the dict to be specifically sets | |
107 | for k, v in self.graph.items(): | |
108 | data[k] = set(v) | |
109 | ||
110 | # Find all items that don't depend on anything. | |
111 | extra_items_in_deps = (reduce(set.union, data.values()) | |
112 | - set(data.keys())) | |
113 | # Add empty dependencies where needed | |
114 | data.update({item:{} for item in extra_items_in_deps}) | |
115 | while True: | |
116 | ordered = set(item for item, dep in data.items() if not dep) | |
117 | if not ordered: | |
118 | break | |
119 | r.extend(ordered) | |
120 | data = {item: (dep - ordered) | |
121 | for item, dep in data.items() | |
122 | if item not in ordered} | |
123 | if data: | |
124 | # This doesn't give as nice results as the stdlib, which | |
125 | # gives you the cycle by listing the nodes in order. Here | |
126 | # we only know the nodes in the cycle but not their order. | |
127 | raise CycleError(f'nodes are in a cycle', list(data.keys())) | |
128 | ||
129 | return r | |
130 | # end TopologicalSorter | |
131 | ||
6699ae6a | 132 | def error_with_file(file, lineno, *args): |
568ae7ef RH |
133 | """Print an error message from file:line and args and exit.""" |
134 | global output_file | |
135 | global output_fd | |
136 | ||
78cc9034 PM |
137 | # For the test suite expected-errors case, don't print the |
138 | # string "error: ", so they don't turn up as false positives | |
139 | # if you grep the meson logs for strings like that. | |
140 | end = 'error: ' if not testforerror else 'detected: ' | |
2fd51b19 RH |
141 | prefix = '' |
142 | if file: | |
9f6e2b4d | 143 | prefix += f'{file}:' |
568ae7ef | 144 | if lineno: |
9f6e2b4d | 145 | prefix += f'{lineno}:' |
2fd51b19 RH |
146 | if prefix: |
147 | prefix += ' ' | |
78cc9034 | 148 | print(prefix, end=end, file=sys.stderr) |
2fd51b19 RH |
149 | print(*args, file=sys.stderr) |
150 | ||
568ae7ef RH |
151 | if output_file and output_fd: |
152 | output_fd.close() | |
c6a5fc2a | 153 | os.remove(output_file) |
9b5acc56 | 154 | exit(0 if testforerror else 1) |
2fd51b19 RH |
155 | # end error_with_file |
156 | ||
568ae7ef | 157 | |
6699ae6a | 158 | def error(lineno, *args): |
2fd51b19 RH |
159 | error_with_file(input_file, lineno, *args) |
160 | # end error | |
161 | ||
568ae7ef RH |
162 | |
163 | def output(*args): | |
164 | global output_fd | |
165 | for a in args: | |
166 | output_fd.write(a) | |
167 | ||
168 | ||
568ae7ef RH |
169 | def output_autogen(): |
170 | output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') | |
171 | ||
172 | ||
173 | def str_indent(c): | |
174 | """Return a string with C spaces""" | |
175 | return ' ' * c | |
176 | ||
177 | ||
178 | def str_fields(fields): | |
65fdb3cc | 179 | """Return a string uniquely identifying FIELDS""" |
568ae7ef RH |
180 | r = '' |
181 | for n in sorted(fields.keys()): | |
182 | r += '_' + n | |
183 | return r[1:] | |
184 | ||
185 | ||
c7cefe6c RH |
186 | def whex(val): |
187 | """Return a hex string for val padded for insnwidth""" | |
188 | global insnwidth | |
189 | return f'0x{val:0{insnwidth // 4}x}' | |
190 | ||
191 | ||
192 | def whexC(val): | |
193 | """Return a hex string for val padded for insnwidth, | |
194 | and with the proper suffix for a C constant.""" | |
195 | suffix = '' | |
60c425f3 LFFP |
196 | if val >= 0x100000000: |
197 | suffix = 'ull' | |
198 | elif val >= 0x80000000: | |
c7cefe6c RH |
199 | suffix = 'u' |
200 | return whex(val) + suffix | |
201 | ||
202 | ||
568ae7ef RH |
203 | def str_match_bits(bits, mask): |
204 | """Return a string pretty-printing BITS/MASK""" | |
205 | global insnwidth | |
206 | ||
207 | i = 1 << (insnwidth - 1) | |
208 | space = 0x01010100 | |
209 | r = '' | |
210 | while i != 0: | |
211 | if i & mask: | |
212 | if i & bits: | |
213 | r += '1' | |
214 | else: | |
215 | r += '0' | |
216 | else: | |
217 | r += '.' | |
218 | if i & space: | |
219 | r += ' ' | |
220 | i >>= 1 | |
221 | return r | |
222 | ||
223 | ||
224 | def is_pow2(x): | |
225 | """Return true iff X is equal to a power of 2.""" | |
226 | return (x & (x - 1)) == 0 | |
227 | ||
228 | ||
229 | def ctz(x): | |
230 | """Return the number of times 2 factors into X.""" | |
b44b3449 | 231 | assert x != 0 |
568ae7ef RH |
232 | r = 0 |
233 | while ((x >> r) & 1) == 0: | |
234 | r += 1 | |
235 | return r | |
236 | ||
237 | ||
238 | def is_contiguous(bits): | |
b44b3449 RH |
239 | if bits == 0: |
240 | return -1 | |
568ae7ef RH |
241 | shift = ctz(bits) |
242 | if is_pow2((bits >> shift) + 1): | |
243 | return shift | |
244 | else: | |
245 | return -1 | |
246 | ||
247 | ||
af93ccac RH |
248 | def eq_fields_for_args(flds_a, arg): |
249 | if len(flds_a) != len(arg.fields): | |
568ae7ef | 250 | return False |
af93ccac RH |
251 | # Only allow inference on default types |
252 | for t in arg.types: | |
253 | if t != 'int': | |
254 | return False | |
568ae7ef | 255 | for k, a in flds_a.items(): |
af93ccac | 256 | if k not in arg.fields: |
568ae7ef RH |
257 | return False |
258 | return True | |
259 | ||
260 | ||
261 | def eq_fields_for_fmts(flds_a, flds_b): | |
262 | if len(flds_a) != len(flds_b): | |
263 | return False | |
264 | for k, a in flds_a.items(): | |
265 | if k not in flds_b: | |
266 | return False | |
267 | b = flds_b[k] | |
268 | if a.__class__ != b.__class__ or a != b: | |
269 | return False | |
270 | return True | |
271 | ||
272 | ||
273 | class Field: | |
274 | """Class representing a simple instruction field""" | |
275 | def __init__(self, sign, pos, len): | |
276 | self.sign = sign | |
277 | self.pos = pos | |
278 | self.len = len | |
279 | self.mask = ((1 << len) - 1) << pos | |
280 | ||
281 | def __str__(self): | |
282 | if self.sign: | |
283 | s = 's' | |
284 | else: | |
285 | s = '' | |
cbcdf1a9 | 286 | return str(self.pos) + ':' + s + str(self.len) |
568ae7ef | 287 | |
aeac22ba | 288 | def str_extract(self, lvalue_formatter): |
60c425f3 LFFP |
289 | global bitop_width |
290 | s = 's' if self.sign else '' | |
291 | return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' | |
568ae7ef | 292 | |
7e6c28be PM |
293 | def referenced_fields(self): |
294 | return [] | |
295 | ||
568ae7ef | 296 | def __eq__(self, other): |
2c7d4427 | 297 | return self.sign == other.sign and self.mask == other.mask |
568ae7ef RH |
298 | |
299 | def __ne__(self, other): | |
300 | return not self.__eq__(other) | |
301 | # end Field | |
302 | ||
303 | ||
304 | class MultiField: | |
305 | """Class representing a compound instruction field""" | |
306 | def __init__(self, subs, mask): | |
307 | self.subs = subs | |
308 | self.sign = subs[0].sign | |
309 | self.mask = mask | |
310 | ||
311 | def __str__(self): | |
312 | return str(self.subs) | |
313 | ||
aeac22ba | 314 | def str_extract(self, lvalue_formatter): |
60c425f3 | 315 | global bitop_width |
568ae7ef RH |
316 | ret = '0' |
317 | pos = 0 | |
318 | for f in reversed(self.subs): | |
aeac22ba | 319 | ext = f.str_extract(lvalue_formatter) |
568ae7ef | 320 | if pos == 0: |
9f6e2b4d | 321 | ret = ext |
568ae7ef | 322 | else: |
60c425f3 | 323 | ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' |
568ae7ef RH |
324 | pos += f.len |
325 | return ret | |
326 | ||
7e6c28be PM |
327 | def referenced_fields(self): |
328 | l = [] | |
329 | for f in self.subs: | |
330 | l.extend(f.referenced_fields()) | |
331 | return l | |
332 | ||
568ae7ef RH |
333 | def __ne__(self, other): |
334 | if len(self.subs) != len(other.subs): | |
335 | return True | |
336 | for a, b in zip(self.subs, other.subs): | |
337 | if a.__class__ != b.__class__ or a != b: | |
338 | return True | |
339 | return False | |
340 | ||
341 | def __eq__(self, other): | |
342 | return not self.__ne__(other) | |
343 | # end MultiField | |
344 | ||
345 | ||
346 | class ConstField: | |
347 | """Class representing an argument field with constant value""" | |
348 | def __init__(self, value): | |
349 | self.value = value | |
350 | self.mask = 0 | |
351 | self.sign = value < 0 | |
352 | ||
353 | def __str__(self): | |
354 | return str(self.value) | |
355 | ||
aeac22ba | 356 | def str_extract(self, lvalue_formatter): |
568ae7ef RH |
357 | return str(self.value) |
358 | ||
7e6c28be PM |
359 | def referenced_fields(self): |
360 | return [] | |
361 | ||
568ae7ef RH |
362 | def __cmp__(self, other): |
363 | return self.value - other.value | |
364 | # end ConstField | |
365 | ||
366 | ||
367 | class FunctionField: | |
94597b61 | 368 | """Class representing a field passed through a function""" |
568ae7ef RH |
369 | def __init__(self, func, base): |
370 | self.mask = base.mask | |
371 | self.sign = base.sign | |
372 | self.base = base | |
373 | self.func = func | |
374 | ||
375 | def __str__(self): | |
376 | return self.func + '(' + str(self.base) + ')' | |
377 | ||
aeac22ba PM |
378 | def str_extract(self, lvalue_formatter): |
379 | return (self.func + '(ctx, ' | |
380 | + self.base.str_extract(lvalue_formatter) + ')') | |
568ae7ef | 381 | |
7e6c28be PM |
382 | def referenced_fields(self): |
383 | return self.base.referenced_fields() | |
384 | ||
568ae7ef RH |
385 | def __eq__(self, other): |
386 | return self.func == other.func and self.base == other.base | |
387 | ||
388 | def __ne__(self, other): | |
389 | return not self.__eq__(other) | |
390 | # end FunctionField | |
391 | ||
392 | ||
94597b61 RH |
393 | class ParameterField: |
394 | """Class representing a pseudo-field read from a function""" | |
395 | def __init__(self, func): | |
396 | self.mask = 0 | |
397 | self.sign = 0 | |
398 | self.func = func | |
399 | ||
400 | def __str__(self): | |
401 | return self.func | |
402 | ||
aeac22ba | 403 | def str_extract(self, lvalue_formatter): |
94597b61 RH |
404 | return self.func + '(ctx)' |
405 | ||
7e6c28be PM |
406 | def referenced_fields(self): |
407 | return [] | |
408 | ||
94597b61 RH |
409 | def __eq__(self, other): |
410 | return self.func == other.func | |
411 | ||
412 | def __ne__(self, other): | |
413 | return not self.__eq__(other) | |
414 | # end ParameterField | |
415 | ||
7e6c28be PM |
416 | class NamedField: |
417 | """Class representing a field already named in the pattern""" | |
418 | def __init__(self, name, sign, len): | |
419 | self.mask = 0 | |
420 | self.sign = sign | |
421 | self.len = len | |
422 | self.name = name | |
423 | ||
424 | def __str__(self): | |
425 | return self.name | |
426 | ||
427 | def str_extract(self, lvalue_formatter): | |
428 | global bitop_width | |
429 | s = 's' if self.sign else '' | |
430 | lvalue = lvalue_formatter(self.name) | |
431 | return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})' | |
432 | ||
433 | def referenced_fields(self): | |
434 | return [self.name] | |
435 | ||
436 | def __eq__(self, other): | |
437 | return self.name == other.name | |
438 | ||
439 | def __ne__(self, other): | |
440 | return not self.__eq__(other) | |
441 | # end NamedField | |
94597b61 | 442 | |
568ae7ef RH |
443 | class Arguments: |
444 | """Class representing the extracted fields of a format""" | |
af93ccac | 445 | def __init__(self, nm, flds, types, extern): |
568ae7ef | 446 | self.name = nm |
abd04f92 | 447 | self.extern = extern |
af93ccac RH |
448 | self.fields = flds |
449 | self.types = types | |
568ae7ef RH |
450 | |
451 | def __str__(self): | |
452 | return self.name + ' ' + str(self.fields) | |
453 | ||
454 | def struct_name(self): | |
455 | return 'arg_' + self.name | |
456 | ||
457 | def output_def(self): | |
abd04f92 RH |
458 | if not self.extern: |
459 | output('typedef struct {\n') | |
af93ccac RH |
460 | for (n, t) in zip(self.fields, self.types): |
461 | output(f' {t} {n};\n') | |
abd04f92 | 462 | output('} ', self.struct_name(), ';\n\n') |
568ae7ef RH |
463 | # end Arguments |
464 | ||
568ae7ef RH |
465 | class General: |
466 | """Common code between instruction formats and instruction patterns""" | |
17560e93 | 467 | def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): |
568ae7ef | 468 | self.name = name |
6699ae6a | 469 | self.file = input_file |
568ae7ef RH |
470 | self.lineno = lineno |
471 | self.base = base | |
472 | self.fixedbits = fixb | |
473 | self.fixedmask = fixm | |
474 | self.undefmask = udfm | |
475 | self.fieldmask = fldm | |
476 | self.fields = flds | |
17560e93 | 477 | self.width = w |
7e6c28be | 478 | self.dangling = None |
568ae7ef RH |
479 | |
480 | def __str__(self): | |
0eff2df4 | 481 | return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) |
568ae7ef RH |
482 | |
483 | def str1(self, i): | |
484 | return str_indent(i) + self.__str__() | |
aeac22ba | 485 | |
7e6c28be PM |
486 | def dangling_references(self): |
487 | # Return a list of all named references which aren't satisfied | |
488 | # directly by this format/pattern. This will be either: | |
489 | # * a format referring to a field which is specified by the | |
490 | # pattern(s) using it | |
491 | # * a pattern referring to a field which is specified by the | |
492 | # format it uses | |
493 | # * a user error (referring to a field that doesn't exist at all) | |
494 | if self.dangling is None: | |
495 | # Compute this once and cache the answer | |
496 | dangling = [] | |
497 | for n, f in self.fields.items(): | |
498 | for r in f.referenced_fields(): | |
499 | if r not in self.fields: | |
500 | dangling.append(r) | |
501 | self.dangling = dangling | |
502 | return self.dangling | |
503 | ||
aeac22ba | 504 | def output_fields(self, indent, lvalue_formatter): |
7e6c28be PM |
505 | # We use a topological sort to ensure that any use of NamedField |
506 | # comes after the initialization of the field it is referencing. | |
507 | graph = {} | |
aeac22ba | 508 | for n, f in self.fields.items(): |
7e6c28be PM |
509 | refs = f.referenced_fields() |
510 | graph[n] = refs | |
511 | ||
512 | try: | |
513 | ts = TopologicalSorter(graph) | |
514 | for n in ts.static_order(): | |
515 | # We only want to emit assignments for the keys | |
516 | # in our fields list, not for anything that ends up | |
517 | # in the tsort graph only because it was referenced as | |
518 | # a NamedField. | |
519 | try: | |
520 | f = self.fields[n] | |
521 | output(indent, lvalue_formatter(n), ' = ', | |
522 | f.str_extract(lvalue_formatter), ';\n') | |
523 | except KeyError: | |
524 | pass | |
525 | except CycleError as e: | |
526 | # The second element of args is a list of nodes which form | |
527 | # a cycle (there might be others too, but only one is reported). | |
528 | # Pretty-print it to tell the user. | |
529 | cycle = ' => '.join(e.args[1]) | |
530 | error(self.lineno, 'field definitions form a cycle: ' + cycle) | |
568ae7ef RH |
531 | # end General |
532 | ||
533 | ||
534 | class Format(General): | |
535 | """Class representing an instruction format""" | |
536 | ||
537 | def extract_name(self): | |
71ecf79b RH |
538 | global decode_function |
539 | return decode_function + '_extract_' + self.name | |
568ae7ef RH |
540 | |
541 | def output_extract(self): | |
451e4ffd | 542 | output('static void ', self.extract_name(), '(DisasContext *ctx, ', |
568ae7ef | 543 | self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') |
aeac22ba | 544 | self.output_fields(str_indent(4), lambda n: 'a->' + n) |
568ae7ef RH |
545 | output('}\n\n') |
546 | # end Format | |
547 | ||
548 | ||
549 | class Pattern(General): | |
550 | """Class representing an instruction pattern""" | |
551 | ||
552 | def output_decl(self): | |
553 | global translate_scope | |
554 | global translate_prefix | |
555 | output('typedef ', self.base.base.struct_name(), | |
556 | ' arg_', self.name, ';\n') | |
76805598 | 557 | output(translate_scope, 'bool ', translate_prefix, '_', self.name, |
3a7be554 | 558 | '(DisasContext *ctx, arg_', self.name, ' *a);\n') |
568ae7ef RH |
559 | |
560 | def output_code(self, i, extracted, outerbits, outermask): | |
561 | global translate_prefix | |
562 | ind = str_indent(i) | |
563 | arg = self.base.base.name | |
6699ae6a | 564 | output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') |
7e6c28be PM |
565 | # We might have named references in the format that refer to fields |
566 | # in the pattern, or named references in the pattern that refer | |
567 | # to fields in the format. This affects whether we extract the fields | |
568 | # for the format before or after the ones for the pattern. | |
569 | # For simplicity we don't allow cross references in both directions. | |
570 | # This is also where we catch the syntax error of referring to | |
571 | # a nonexistent field. | |
572 | fmt_refs = self.base.dangling_references() | |
573 | for r in fmt_refs: | |
574 | if r not in self.fields: | |
575 | error(self.lineno, f'format refers to undefined field {r}') | |
576 | pat_refs = self.dangling_references() | |
577 | for r in pat_refs: | |
578 | if r not in self.base.fields: | |
579 | error(self.lineno, f'pattern refers to undefined field {r}') | |
580 | if pat_refs and fmt_refs: | |
581 | error(self.lineno, ('pattern that uses fields defined in format ' | |
582 | 'cannot use format that uses fields defined ' | |
583 | 'in pattern')) | |
584 | if fmt_refs: | |
585 | # pattern fields first | |
586 | self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | |
587 | assert not extracted, "dangling fmt refs but it was already extracted" | |
568ae7ef | 588 | if not extracted: |
451e4ffd RH |
589 | output(ind, self.base.extract_name(), |
590 | '(ctx, &u.f_', arg, ', insn);\n') | |
7e6c28be PM |
591 | if not fmt_refs: |
592 | # pattern fields last | |
593 | self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) | |
594 | ||
eb6b87fa RH |
595 | output(ind, 'if (', translate_prefix, '_', self.name, |
596 | '(ctx, &u.f_', arg, ')) return true;\n') | |
08561fc1 RH |
597 | |
598 | # Normal patterns do not have children. | |
599 | def build_tree(self): | |
600 | return | |
601 | def prop_masks(self): | |
602 | return | |
603 | def prop_format(self): | |
604 | return | |
605 | def prop_width(self): | |
606 | return | |
607 | ||
568ae7ef RH |
608 | # end Pattern |
609 | ||
610 | ||
df63044d RH |
611 | class MultiPattern(General): |
612 | """Class representing a set of instruction patterns""" | |
0eff2df4 | 613 | |
08561fc1 | 614 | def __init__(self, lineno): |
0eff2df4 RH |
615 | self.file = input_file |
616 | self.lineno = lineno | |
08561fc1 | 617 | self.pats = [] |
0eff2df4 | 618 | self.base = None |
df63044d RH |
619 | self.fixedbits = 0 |
620 | self.fixedmask = 0 | |
621 | self.undefmask = 0 | |
622 | self.width = None | |
0eff2df4 RH |
623 | |
624 | def __str__(self): | |
df63044d RH |
625 | r = 'group' |
626 | if self.fixedbits is not None: | |
627 | r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) | |
628 | return r | |
0eff2df4 RH |
629 | |
630 | def output_decl(self): | |
631 | for p in self.pats: | |
632 | p.output_decl() | |
08561fc1 RH |
633 | |
634 | def prop_masks(self): | |
635 | global insnmask | |
636 | ||
637 | fixedmask = insnmask | |
638 | undefmask = insnmask | |
639 | ||
640 | # Collect fixedmask/undefmask for all of the children. | |
641 | for p in self.pats: | |
642 | p.prop_masks() | |
643 | fixedmask &= p.fixedmask | |
644 | undefmask &= p.undefmask | |
645 | ||
646 | # Widen fixedmask until all fixedbits match | |
647 | repeat = True | |
648 | fixedbits = 0 | |
649 | while repeat and fixedmask != 0: | |
650 | fixedbits = None | |
651 | for p in self.pats: | |
652 | thisbits = p.fixedbits & fixedmask | |
653 | if fixedbits is None: | |
654 | fixedbits = thisbits | |
655 | elif fixedbits != thisbits: | |
656 | fixedmask &= ~(fixedbits ^ thisbits) | |
657 | break | |
658 | else: | |
659 | repeat = False | |
660 | ||
661 | self.fixedbits = fixedbits | |
662 | self.fixedmask = fixedmask | |
663 | self.undefmask = undefmask | |
664 | ||
665 | def build_tree(self): | |
666 | for p in self.pats: | |
667 | p.build_tree() | |
668 | ||
669 | def prop_format(self): | |
670 | for p in self.pats: | |
2fd2eb5a | 671 | p.prop_format() |
08561fc1 RH |
672 | |
673 | def prop_width(self): | |
674 | width = None | |
675 | for p in self.pats: | |
676 | p.prop_width() | |
677 | if width is None: | |
678 | width = p.width | |
679 | elif width != p.width: | |
680 | error_with_file(self.file, self.lineno, | |
681 | 'width mismatch in patterns within braces') | |
682 | self.width = width | |
683 | ||
df63044d RH |
684 | # end MultiPattern |
685 | ||
686 | ||
687 | class IncMultiPattern(MultiPattern): | |
688 | """Class representing an overlapping set of instruction patterns""" | |
689 | ||
0eff2df4 RH |
690 | def output_code(self, i, extracted, outerbits, outermask): |
691 | global translate_prefix | |
692 | ind = str_indent(i) | |
693 | for p in self.pats: | |
694 | if outermask != p.fixedmask: | |
695 | innermask = p.fixedmask & ~outermask | |
696 | innerbits = p.fixedbits & ~outermask | |
c7cefe6c RH |
697 | output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') |
698 | output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') | |
0eff2df4 RH |
699 | p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) |
700 | output(ind, '}\n') | |
701 | else: | |
702 | p.output_code(i, extracted, p.fixedbits, p.fixedmask) | |
f2604471 RH |
703 | |
704 | def build_tree(self): | |
705 | if not self.pats: | |
706 | error_with_file(self.file, self.lineno, 'empty pattern group') | |
707 | super().build_tree() | |
708 | ||
040145c4 | 709 | #end IncMultiPattern |
0eff2df4 RH |
710 | |
711 | ||
08561fc1 RH |
712 | class Tree: |
713 | """Class representing a node in a decode tree""" | |
714 | ||
715 | def __init__(self, fm, tm): | |
716 | self.fixedmask = fm | |
717 | self.thismask = tm | |
718 | self.subs = [] | |
719 | self.base = None | |
720 | ||
721 | def str1(self, i): | |
722 | ind = str_indent(i) | |
c7cefe6c | 723 | r = ind + whex(self.fixedmask) |
08561fc1 RH |
724 | if self.format: |
725 | r += ' ' + self.format.name | |
726 | r += ' [\n' | |
727 | for (b, s) in self.subs: | |
c7cefe6c | 728 | r += ind + f' {whex(b)}:\n' |
08561fc1 RH |
729 | r += s.str1(i + 4) + '\n' |
730 | r += ind + ']' | |
731 | return r | |
732 | ||
733 | def __str__(self): | |
734 | return self.str1(0) | |
735 | ||
736 | def output_code(self, i, extracted, outerbits, outermask): | |
737 | ind = str_indent(i) | |
738 | ||
739 | # If we identified all nodes below have the same format, | |
7e6c28be PM |
740 | # extract the fields now. But don't do it if the format relies |
741 | # on named fields from the insn pattern, as those won't have | |
742 | # been initialised at this point. | |
743 | if not extracted and self.base and not self.base.dangling_references(): | |
08561fc1 RH |
744 | output(ind, self.base.extract_name(), |
745 | '(ctx, &u.f_', self.base.base.name, ', insn);\n') | |
746 | extracted = True | |
747 | ||
748 | # Attempt to aid the compiler in producing compact switch statements. | |
749 | # If the bits in the mask are contiguous, extract them. | |
750 | sh = is_contiguous(self.thismask) | |
751 | if sh > 0: | |
752 | # Propagate SH down into the local functions. | |
753 | def str_switch(b, sh=sh): | |
c7cefe6c | 754 | return f'(insn >> {sh}) & {b >> sh:#x}' |
08561fc1 RH |
755 | |
756 | def str_case(b, sh=sh): | |
c7cefe6c | 757 | return hex(b >> sh) |
08561fc1 RH |
758 | else: |
759 | def str_switch(b): | |
c7cefe6c | 760 | return f'insn & {whexC(b)}' |
08561fc1 RH |
761 | |
762 | def str_case(b): | |
c7cefe6c | 763 | return whexC(b) |
08561fc1 RH |
764 | |
765 | output(ind, 'switch (', str_switch(self.thismask), ') {\n') | |
766 | for b, s in sorted(self.subs): | |
767 | assert (self.thismask & ~s.fixedmask) == 0 | |
768 | innermask = outermask | self.thismask | |
769 | innerbits = outerbits | b | |
770 | output(ind, 'case ', str_case(b), ':\n') | |
771 | output(ind, ' /* ', | |
772 | str_match_bits(innerbits, innermask), ' */\n') | |
773 | s.output_code(i + 4, extracted, innerbits, innermask) | |
514101c0 | 774 | output(ind, ' break;\n') |
08561fc1 RH |
775 | output(ind, '}\n') |
776 | # end Tree | |
777 | ||
778 | ||
779 | class ExcMultiPattern(MultiPattern): | |
780 | """Class representing a non-overlapping set of instruction patterns""" | |
781 | ||
782 | def output_code(self, i, extracted, outerbits, outermask): | |
783 | # Defer everything to our decomposed Tree node | |
784 | self.tree.output_code(i, extracted, outerbits, outermask) | |
785 | ||
786 | @staticmethod | |
787 | def __build_tree(pats, outerbits, outermask): | |
788 | # Find the intersection of all remaining fixedmask. | |
789 | innermask = ~outermask & insnmask | |
790 | for i in pats: | |
791 | innermask &= i.fixedmask | |
792 | ||
793 | if innermask == 0: | |
794 | # Edge condition: One pattern covers the entire insnmask | |
795 | if len(pats) == 1: | |
796 | t = Tree(outermask, innermask) | |
797 | t.subs.append((0, pats[0])) | |
798 | return t | |
799 | ||
800 | text = 'overlapping patterns:' | |
801 | for p in pats: | |
802 | text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) | |
803 | error_with_file(pats[0].file, pats[0].lineno, text) | |
804 | ||
805 | fullmask = outermask | innermask | |
806 | ||
807 | # Sort each element of pats into the bin selected by the mask. | |
808 | bins = {} | |
809 | for i in pats: | |
810 | fb = i.fixedbits & innermask | |
811 | if fb in bins: | |
812 | bins[fb].append(i) | |
813 | else: | |
814 | bins[fb] = [i] | |
815 | ||
816 | # We must recurse if any bin has more than one element or if | |
817 | # the single element in the bin has not been fully matched. | |
818 | t = Tree(fullmask, innermask) | |
819 | ||
820 | for b, l in bins.items(): | |
821 | s = l[0] | |
822 | if len(l) > 1 or s.fixedmask & ~fullmask != 0: | |
823 | s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) | |
824 | t.subs.append((b, s)) | |
825 | ||
826 | return t | |
827 | ||
828 | def build_tree(self): | |
2fd2eb5a | 829 | super().build_tree() |
08561fc1 RH |
830 | self.tree = self.__build_tree(self.pats, self.fixedbits, |
831 | self.fixedmask) | |
832 | ||
833 | @staticmethod | |
834 | def __prop_format(tree): | |
835 | """Propagate Format objects into the decode tree""" | |
836 | ||
837 | # Depth first search. | |
838 | for (b, s) in tree.subs: | |
839 | if isinstance(s, Tree): | |
840 | ExcMultiPattern.__prop_format(s) | |
841 | ||
842 | # If all entries in SUBS have the same format, then | |
843 | # propagate that into the tree. | |
844 | f = None | |
845 | for (b, s) in tree.subs: | |
846 | if f is None: | |
847 | f = s.base | |
848 | if f is None: | |
849 | return | |
850 | if f is not s.base: | |
851 | return | |
852 | tree.base = f | |
853 | ||
854 | def prop_format(self): | |
855 | super().prop_format() | |
856 | self.__prop_format(self.tree) | |
857 | ||
858 | # end ExcMultiPattern | |
859 | ||
860 | ||
568ae7ef RH |
861 | def parse_field(lineno, name, toks): |
862 | """Parse one instruction field from TOKS at LINENO""" | |
863 | global fields | |
568ae7ef | 864 | global insnwidth |
7e6c28be | 865 | global re_C_ident |
568ae7ef RH |
866 | |
867 | # A "simple" field will have only one entry; | |
868 | # a "multifield" will have several. | |
869 | subs = [] | |
870 | width = 0 | |
871 | func = None | |
872 | for t in toks: | |
acfdd239 | 873 | if re.match('^!function=', t): |
568ae7ef RH |
874 | if func: |
875 | error(lineno, 'duplicate function') | |
876 | func = t.split('=') | |
877 | func = func[1] | |
878 | continue | |
879 | ||
7e6c28be PM |
880 | if re.fullmatch(re_C_ident + ':s[0-9]+', t): |
881 | # Signed named field | |
882 | subtoks = t.split(':') | |
883 | n = subtoks[0] | |
884 | le = int(subtoks[1]) | |
885 | f = NamedField(n, True, le) | |
886 | subs.append(f) | |
887 | width += le | |
888 | continue | |
889 | if re.fullmatch(re_C_ident + ':[0-9]+', t): | |
890 | # Unsigned named field | |
891 | subtoks = t.split(':') | |
892 | n = subtoks[0] | |
893 | le = int(subtoks[1]) | |
894 | f = NamedField(n, False, le) | |
895 | subs.append(f) | |
896 | width += le | |
897 | continue | |
898 | ||
2d110c11 | 899 | if re.fullmatch('[0-9]+:s[0-9]+', t): |
568ae7ef RH |
900 | # Signed field extract |
901 | subtoks = t.split(':s') | |
902 | sign = True | |
2d110c11 | 903 | elif re.fullmatch('[0-9]+:[0-9]+', t): |
568ae7ef RH |
904 | # Unsigned field extract |
905 | subtoks = t.split(':') | |
906 | sign = False | |
907 | else: | |
9f6e2b4d | 908 | error(lineno, f'invalid field token "{t}"') |
568ae7ef RH |
909 | po = int(subtoks[0]) |
910 | le = int(subtoks[1]) | |
911 | if po + le > insnwidth: | |
9f6e2b4d | 912 | error(lineno, f'field {t} too large') |
568ae7ef RH |
913 | f = Field(sign, po, le) |
914 | subs.append(f) | |
915 | width += le | |
916 | ||
917 | if width > insnwidth: | |
918 | error(lineno, 'field too large') | |
94597b61 RH |
919 | if len(subs) == 0: |
920 | if func: | |
921 | f = ParameterField(func) | |
922 | else: | |
923 | error(lineno, 'field with no value') | |
568ae7ef | 924 | else: |
94597b61 RH |
925 | if len(subs) == 1: |
926 | f = subs[0] | |
927 | else: | |
928 | mask = 0 | |
929 | for s in subs: | |
930 | if mask & s.mask: | |
931 | error(lineno, 'field components overlap') | |
932 | mask |= s.mask | |
933 | f = MultiField(subs, mask) | |
934 | if func: | |
935 | f = FunctionField(func, f) | |
568ae7ef RH |
936 | |
937 | if name in fields: | |
938 | error(lineno, 'duplicate field', name) | |
939 | fields[name] = f | |
940 | # end parse_field | |
941 | ||
942 | ||
943 | def parse_arguments(lineno, name, toks): | |
944 | """Parse one argument set from TOKS at LINENO""" | |
945 | global arguments | |
acfdd239 | 946 | global re_C_ident |
c6920795 | 947 | global anyextern |
568ae7ef RH |
948 | |
949 | flds = [] | |
af93ccac | 950 | types = [] |
abd04f92 | 951 | extern = False |
af93ccac RH |
952 | for n in toks: |
953 | if re.fullmatch('!extern', n): | |
abd04f92 | 954 | extern = True |
c6920795 | 955 | anyextern = True |
abd04f92 | 956 | continue |
af93ccac RH |
957 | if re.fullmatch(re_C_ident + ':' + re_C_ident, n): |
958 | (n, t) = n.split(':') | |
959 | elif re.fullmatch(re_C_ident, n): | |
960 | t = 'int' | |
961 | else: | |
962 | error(lineno, f'invalid argument set token "{n}"') | |
963 | if n in flds: | |
964 | error(lineno, f'duplicate argument "{n}"') | |
965 | flds.append(n) | |
966 | types.append(t) | |
568ae7ef RH |
967 | |
968 | if name in arguments: | |
969 | error(lineno, 'duplicate argument set', name) | |
af93ccac | 970 | arguments[name] = Arguments(name, flds, types, extern) |
568ae7ef RH |
971 | # end parse_arguments |
972 | ||
973 | ||
974 | def lookup_field(lineno, name): | |
975 | global fields | |
976 | if name in fields: | |
977 | return fields[name] | |
978 | error(lineno, 'undefined field', name) | |
979 | ||
980 | ||
981 | def add_field(lineno, flds, new_name, f): | |
982 | if new_name in flds: | |
983 | error(lineno, 'duplicate field', new_name) | |
984 | flds[new_name] = f | |
985 | return flds | |
986 | ||
987 | ||
988 | def add_field_byname(lineno, flds, new_name, old_name): | |
989 | return add_field(lineno, flds, new_name, lookup_field(lineno, old_name)) | |
990 | ||
991 | ||
992 | def infer_argument_set(flds): | |
993 | global arguments | |
abd04f92 | 994 | global decode_function |
568ae7ef RH |
995 | |
996 | for arg in arguments.values(): | |
af93ccac | 997 | if eq_fields_for_args(flds, arg): |
568ae7ef RH |
998 | return arg |
999 | ||
abd04f92 | 1000 | name = decode_function + str(len(arguments)) |
af93ccac | 1001 | arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) |
568ae7ef RH |
1002 | arguments[name] = arg |
1003 | return arg | |
1004 | ||
1005 | ||
17560e93 | 1006 | def infer_format(arg, fieldmask, flds, width): |
568ae7ef RH |
1007 | global arguments |
1008 | global formats | |
abd04f92 | 1009 | global decode_function |
568ae7ef RH |
1010 | |
1011 | const_flds = {} | |
1012 | var_flds = {} | |
1013 | for n, c in flds.items(): | |
1014 | if c is ConstField: | |
1015 | const_flds[n] = c | |
1016 | else: | |
1017 | var_flds[n] = c | |
1018 | ||
1019 | # Look for an existing format with the same argument set and fields | |
1020 | for fmt in formats.values(): | |
1021 | if arg and fmt.base != arg: | |
1022 | continue | |
1023 | if fieldmask != fmt.fieldmask: | |
1024 | continue | |
17560e93 RH |
1025 | if width != fmt.width: |
1026 | continue | |
568ae7ef RH |
1027 | if not eq_fields_for_fmts(flds, fmt.fields): |
1028 | continue | |
1029 | return (fmt, const_flds) | |
1030 | ||
abd04f92 | 1031 | name = decode_function + '_Fmt_' + str(len(formats)) |
568ae7ef RH |
1032 | if not arg: |
1033 | arg = infer_argument_set(flds) | |
1034 | ||
17560e93 | 1035 | fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) |
568ae7ef RH |
1036 | formats[name] = fmt |
1037 | ||
1038 | return (fmt, const_flds) | |
1039 | # end infer_format | |
1040 | ||
1041 | ||
08561fc1 | 1042 | def parse_generic(lineno, parent_pat, name, toks): |
568ae7ef RH |
1043 | """Parse one instruction format from TOKS at LINENO""" |
1044 | global fields | |
1045 | global arguments | |
1046 | global formats | |
0eff2df4 | 1047 | global allpatterns |
acfdd239 RH |
1048 | global re_arg_ident |
1049 | global re_fld_ident | |
1050 | global re_fmt_ident | |
1051 | global re_C_ident | |
568ae7ef RH |
1052 | global insnwidth |
1053 | global insnmask | |
17560e93 | 1054 | global variablewidth |
568ae7ef | 1055 | |
08561fc1 RH |
1056 | is_format = parent_pat is None |
1057 | ||
568ae7ef RH |
1058 | fixedmask = 0 |
1059 | fixedbits = 0 | |
1060 | undefmask = 0 | |
1061 | width = 0 | |
1062 | flds = {} | |
1063 | arg = None | |
1064 | fmt = None | |
1065 | for t in toks: | |
65fdb3cc | 1066 | # '&Foo' gives a format an explicit argument set. |
acfdd239 | 1067 | if re.fullmatch(re_arg_ident, t): |
568ae7ef RH |
1068 | tt = t[1:] |
1069 | if arg: | |
1070 | error(lineno, 'multiple argument sets') | |
1071 | if tt in arguments: | |
1072 | arg = arguments[tt] | |
1073 | else: | |
1074 | error(lineno, 'undefined argument set', t) | |
1075 | continue | |
1076 | ||
1077 | # '@Foo' gives a pattern an explicit format. | |
acfdd239 | 1078 | if re.fullmatch(re_fmt_ident, t): |
568ae7ef RH |
1079 | tt = t[1:] |
1080 | if fmt: | |
1081 | error(lineno, 'multiple formats') | |
1082 | if tt in formats: | |
1083 | fmt = formats[tt] | |
1084 | else: | |
1085 | error(lineno, 'undefined format', t) | |
1086 | continue | |
1087 | ||
1088 | # '%Foo' imports a field. | |
acfdd239 | 1089 | if re.fullmatch(re_fld_ident, t): |
568ae7ef RH |
1090 | tt = t[1:] |
1091 | flds = add_field_byname(lineno, flds, tt, tt) | |
1092 | continue | |
1093 | ||
1094 | # 'Foo=%Bar' imports a field with a different name. | |
acfdd239 | 1095 | if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): |
568ae7ef RH |
1096 | (fname, iname) = t.split('=%') |
1097 | flds = add_field_byname(lineno, flds, fname, iname) | |
1098 | continue | |
1099 | ||
1100 | # 'Foo=number' sets an argument field to a constant value | |
acfdd239 | 1101 | if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): |
568ae7ef RH |
1102 | (fname, value) = t.split('=') |
1103 | value = int(value) | |
1104 | flds = add_field(lineno, flds, fname, ConstField(value)) | |
1105 | continue | |
1106 | ||
1107 | # Pattern of 0s, 1s, dots and dashes indicate required zeros, | |
1108 | # required ones, or dont-cares. | |
2d110c11 | 1109 | if re.fullmatch('[01.-]+', t): |
568ae7ef RH |
1110 | shift = len(t) |
1111 | fms = t.replace('0', '1') | |
1112 | fms = fms.replace('.', '0') | |
1113 | fms = fms.replace('-', '0') | |
1114 | fbs = t.replace('.', '0') | |
1115 | fbs = fbs.replace('-', '0') | |
1116 | ubm = t.replace('1', '0') | |
1117 | ubm = ubm.replace('.', '0') | |
1118 | ubm = ubm.replace('-', '1') | |
1119 | fms = int(fms, 2) | |
1120 | fbs = int(fbs, 2) | |
1121 | ubm = int(ubm, 2) | |
1122 | fixedbits = (fixedbits << shift) | fbs | |
1123 | fixedmask = (fixedmask << shift) | fms | |
1124 | undefmask = (undefmask << shift) | ubm | |
1125 | # Otherwise, fieldname:fieldwidth | |
acfdd239 | 1126 | elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): |
568ae7ef RH |
1127 | (fname, flen) = t.split(':') |
1128 | sign = False | |
1129 | if flen[0] == 's': | |
1130 | sign = True | |
1131 | flen = flen[1:] | |
1132 | shift = int(flen, 10) | |
2decfc95 | 1133 | if shift + width > insnwidth: |
9f6e2b4d | 1134 | error(lineno, f'field {fname} exceeds insnwidth') |
568ae7ef RH |
1135 | f = Field(sign, insnwidth - width - shift, shift) |
1136 | flds = add_field(lineno, flds, fname, f) | |
1137 | fixedbits <<= shift | |
1138 | fixedmask <<= shift | |
1139 | undefmask <<= shift | |
1140 | else: | |
9f6e2b4d | 1141 | error(lineno, f'invalid token "{t}"') |
568ae7ef RH |
1142 | width += shift |
1143 | ||
17560e93 RH |
1144 | if variablewidth and width < insnwidth and width % 8 == 0: |
1145 | shift = insnwidth - width | |
1146 | fixedbits <<= shift | |
1147 | fixedmask <<= shift | |
1148 | undefmask <<= shift | |
1149 | undefmask |= (1 << shift) - 1 | |
1150 | ||
568ae7ef | 1151 | # We should have filled in all of the bits of the instruction. |
17560e93 | 1152 | elif not (is_format and width == 0) and width != insnwidth: |
9f6e2b4d | 1153 | error(lineno, f'definition has {width} bits') |
568ae7ef | 1154 | |
65fdb3cc | 1155 | # Do not check for fields overlapping fields; one valid usage |
568ae7ef RH |
1156 | # is to be able to duplicate fields via import. |
1157 | fieldmask = 0 | |
1158 | for f in flds.values(): | |
1159 | fieldmask |= f.mask | |
1160 | ||
1161 | # Fix up what we've parsed to match either a format or a pattern. | |
1162 | if is_format: | |
1163 | # Formats cannot reference formats. | |
1164 | if fmt: | |
1165 | error(lineno, 'format referencing format') | |
1166 | # If an argument set is given, then there should be no fields | |
1167 | # without a place to store it. | |
1168 | if arg: | |
1169 | for f in flds.keys(): | |
1170 | if f not in arg.fields: | |
9f6e2b4d | 1171 | error(lineno, f'field {f} not in argument set {arg.name}') |
568ae7ef RH |
1172 | else: |
1173 | arg = infer_argument_set(flds) | |
1174 | if name in formats: | |
1175 | error(lineno, 'duplicate format name', name) | |
1176 | fmt = Format(name, lineno, arg, fixedbits, fixedmask, | |
17560e93 | 1177 | undefmask, fieldmask, flds, width) |
568ae7ef RH |
1178 | formats[name] = fmt |
1179 | else: | |
1180 | # Patterns can reference a format ... | |
1181 | if fmt: | |
1182 | # ... but not an argument simultaneously | |
1183 | if arg: | |
1184 | error(lineno, 'pattern specifies both format and argument set') | |
1185 | if fixedmask & fmt.fixedmask: | |
1186 | error(lineno, 'pattern fixed bits overlap format fixed bits') | |
17560e93 RH |
1187 | if width != fmt.width: |
1188 | error(lineno, 'pattern uses format of different width') | |
568ae7ef RH |
1189 | fieldmask |= fmt.fieldmask |
1190 | fixedbits |= fmt.fixedbits | |
1191 | fixedmask |= fmt.fixedmask | |
1192 | undefmask |= fmt.undefmask | |
1193 | else: | |
17560e93 | 1194 | (fmt, flds) = infer_format(arg, fieldmask, flds, width) |
568ae7ef RH |
1195 | arg = fmt.base |
1196 | for f in flds.keys(): | |
1197 | if f not in arg.fields: | |
9f6e2b4d | 1198 | error(lineno, f'field {f} not in argument set {arg.name}') |
568ae7ef | 1199 | if f in fmt.fields.keys(): |
9f6e2b4d | 1200 | error(lineno, f'field {f} set by format and pattern') |
568ae7ef RH |
1201 | for f in arg.fields: |
1202 | if f not in flds.keys() and f not in fmt.fields.keys(): | |
9f6e2b4d | 1203 | error(lineno, f'field {f} not initialized') |
568ae7ef | 1204 | pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, |
17560e93 | 1205 | undefmask, fieldmask, flds, width) |
08561fc1 | 1206 | parent_pat.pats.append(pat) |
0eff2df4 | 1207 | allpatterns.append(pat) |
568ae7ef RH |
1208 | |
1209 | # Validate the masks that we have assembled. | |
1210 | if fieldmask & fixedmask: | |
c7cefe6c RH |
1211 | error(lineno, 'fieldmask overlaps fixedmask ', |
1212 | f'({whex(fieldmask)} & {whex(fixedmask)})') | |
568ae7ef | 1213 | if fieldmask & undefmask: |
c7cefe6c RH |
1214 | error(lineno, 'fieldmask overlaps undefmask ', |
1215 | f'({whex(fieldmask)} & {whex(undefmask)})') | |
568ae7ef | 1216 | if fixedmask & undefmask: |
c7cefe6c RH |
1217 | error(lineno, 'fixedmask overlaps undefmask ', |
1218 | f'({whex(fixedmask)} & {whex(undefmask)})') | |
568ae7ef RH |
1219 | if not is_format: |
1220 | allbits = fieldmask | fixedmask | undefmask | |
1221 | if allbits != insnmask: | |
c7cefe6c RH |
1222 | error(lineno, 'bits left unspecified ', |
1223 | f'({whex(allbits ^ insnmask)})') | |
568ae7ef RH |
1224 | # end parse_general |
1225 | ||
0eff2df4 | 1226 | |
08561fc1 | 1227 | def parse_file(f, parent_pat): |
568ae7ef | 1228 | """Parse all of the patterns within a file""" |
acfdd239 RH |
1229 | global re_arg_ident |
1230 | global re_fld_ident | |
1231 | global re_fmt_ident | |
1232 | global re_pat_ident | |
568ae7ef RH |
1233 | |
1234 | # Read all of the lines of the file. Concatenate lines | |
1235 | # ending in backslash; discard empty lines and comments. | |
1236 | toks = [] | |
1237 | lineno = 0 | |
0eff2df4 | 1238 | nesting = 0 |
08561fc1 | 1239 | nesting_pats = [] |
0eff2df4 | 1240 | |
568ae7ef RH |
1241 | for line in f: |
1242 | lineno += 1 | |
1243 | ||
0eff2df4 RH |
1244 | # Expand and strip spaces, to find indent. |
1245 | line = line.rstrip() | |
1246 | line = line.expandtabs() | |
1247 | len1 = len(line) | |
1248 | line = line.lstrip() | |
1249 | len2 = len(line) | |
1250 | ||
568ae7ef RH |
1251 | # Discard comments |
1252 | end = line.find('#') | |
1253 | if end >= 0: | |
1254 | line = line[:end] | |
1255 | ||
1256 | t = line.split() | |
1257 | if len(toks) != 0: | |
1258 | # Next line after continuation | |
1259 | toks.extend(t) | |
568ae7ef | 1260 | else: |
0eff2df4 RH |
1261 | # Allow completely blank lines. |
1262 | if len1 == 0: | |
1263 | continue | |
1264 | indent = len1 - len2 | |
1265 | # Empty line due to comment. | |
1266 | if len(t) == 0: | |
1267 | # Indentation must be correct, even for comment lines. | |
1268 | if indent != nesting: | |
1269 | error(lineno, 'indentation ', indent, ' != ', nesting) | |
1270 | continue | |
1271 | start_lineno = lineno | |
568ae7ef RH |
1272 | toks = t |
1273 | ||
1274 | # Continuation? | |
1275 | if toks[-1] == '\\': | |
1276 | toks.pop() | |
1277 | continue | |
1278 | ||
568ae7ef RH |
1279 | name = toks[0] |
1280 | del toks[0] | |
1281 | ||
0eff2df4 | 1282 | # End nesting? |
067e8b0f | 1283 | if name == '}' or name == ']': |
0eff2df4 RH |
1284 | if len(toks) != 0: |
1285 | error(start_lineno, 'extra tokens after close brace') | |
08561fc1 | 1286 | |
067e8b0f RH |
1287 | # Make sure { } and [ ] nest properly. |
1288 | if (name == '}') != isinstance(parent_pat, IncMultiPattern): | |
1289 | error(lineno, 'mismatched close brace') | |
1290 | ||
08561fc1 RH |
1291 | try: |
1292 | parent_pat = nesting_pats.pop() | |
1293 | except: | |
067e8b0f | 1294 | error(lineno, 'extra close brace') |
08561fc1 | 1295 | |
0eff2df4 RH |
1296 | nesting -= 2 |
1297 | if indent != nesting: | |
08561fc1 RH |
1298 | error(lineno, 'indentation ', indent, ' != ', nesting) |
1299 | ||
0eff2df4 RH |
1300 | toks = [] |
1301 | continue | |
1302 | ||
1303 | # Everything else should have current indentation. | |
1304 | if indent != nesting: | |
1305 | error(start_lineno, 'indentation ', indent, ' != ', nesting) | |
1306 | ||
1307 | # Start nesting? | |
067e8b0f | 1308 | if name == '{' or name == '[': |
0eff2df4 RH |
1309 | if len(toks) != 0: |
1310 | error(start_lineno, 'extra tokens after open brace') | |
08561fc1 | 1311 | |
067e8b0f RH |
1312 | if name == '{': |
1313 | nested_pat = IncMultiPattern(start_lineno) | |
1314 | else: | |
1315 | nested_pat = ExcMultiPattern(start_lineno) | |
08561fc1 RH |
1316 | parent_pat.pats.append(nested_pat) |
1317 | nesting_pats.append(parent_pat) | |
1318 | parent_pat = nested_pat | |
1319 | ||
0eff2df4 RH |
1320 | nesting += 2 |
1321 | toks = [] | |
1322 | continue | |
1323 | ||
568ae7ef | 1324 | # Determine the type of object needing to be parsed. |
acfdd239 | 1325 | if re.fullmatch(re_fld_ident, name): |
0eff2df4 | 1326 | parse_field(start_lineno, name[1:], toks) |
acfdd239 | 1327 | elif re.fullmatch(re_arg_ident, name): |
0eff2df4 | 1328 | parse_arguments(start_lineno, name[1:], toks) |
acfdd239 | 1329 | elif re.fullmatch(re_fmt_ident, name): |
08561fc1 | 1330 | parse_generic(start_lineno, None, name[1:], toks) |
acfdd239 | 1331 | elif re.fullmatch(re_pat_ident, name): |
08561fc1 | 1332 | parse_generic(start_lineno, parent_pat, name, toks) |
acfdd239 | 1333 | else: |
9f6e2b4d | 1334 | error(lineno, f'invalid token "{name}"') |
568ae7ef | 1335 | toks = [] |
067e8b0f RH |
1336 | |
1337 | if nesting != 0: | |
1338 | error(lineno, 'missing close brace') | |
568ae7ef RH |
1339 | # end parse_file |
1340 | ||
1341 | ||
70e0711a RH |
1342 | class SizeTree: |
1343 | """Class representing a node in a size decode tree""" | |
1344 | ||
1345 | def __init__(self, m, w): | |
1346 | self.mask = m | |
1347 | self.subs = [] | |
1348 | self.base = None | |
1349 | self.width = w | |
1350 | ||
1351 | def str1(self, i): | |
1352 | ind = str_indent(i) | |
c7cefe6c | 1353 | r = ind + whex(self.mask) + ' [\n' |
70e0711a | 1354 | for (b, s) in self.subs: |
c7cefe6c | 1355 | r += ind + f' {whex(b)}:\n' |
70e0711a RH |
1356 | r += s.str1(i + 4) + '\n' |
1357 | r += ind + ']' | |
1358 | return r | |
1359 | ||
1360 | def __str__(self): | |
1361 | return self.str1(0) | |
1362 | ||
1363 | def output_code(self, i, extracted, outerbits, outermask): | |
1364 | ind = str_indent(i) | |
1365 | ||
1366 | # If we need to load more bytes to test, do so now. | |
1367 | if extracted < self.width: | |
9f6e2b4d RH |
1368 | output(ind, f'insn = {decode_function}_load_bytes', |
1369 | f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') | |
70e0711a RH |
1370 | extracted = self.width |
1371 | ||
1372 | # Attempt to aid the compiler in producing compact switch statements. | |
1373 | # If the bits in the mask are contiguous, extract them. | |
1374 | sh = is_contiguous(self.mask) | |
1375 | if sh > 0: | |
1376 | # Propagate SH down into the local functions. | |
1377 | def str_switch(b, sh=sh): | |
c7cefe6c | 1378 | return f'(insn >> {sh}) & {b >> sh:#x}' |
70e0711a RH |
1379 | |
1380 | def str_case(b, sh=sh): | |
c7cefe6c | 1381 | return hex(b >> sh) |
70e0711a RH |
1382 | else: |
1383 | def str_switch(b): | |
c7cefe6c | 1384 | return f'insn & {whexC(b)}' |
70e0711a RH |
1385 | |
1386 | def str_case(b): | |
c7cefe6c | 1387 | return whexC(b) |
70e0711a RH |
1388 | |
1389 | output(ind, 'switch (', str_switch(self.mask), ') {\n') | |
1390 | for b, s in sorted(self.subs): | |
1391 | innermask = outermask | self.mask | |
1392 | innerbits = outerbits | b | |
1393 | output(ind, 'case ', str_case(b), ':\n') | |
1394 | output(ind, ' /* ', | |
1395 | str_match_bits(innerbits, innermask), ' */\n') | |
1396 | s.output_code(i + 4, extracted, innerbits, innermask) | |
1397 | output(ind, '}\n') | |
1398 | output(ind, 'return insn;\n') | |
1399 | # end SizeTree | |
1400 | ||
1401 | class SizeLeaf: | |
1402 | """Class representing a leaf node in a size decode tree""" | |
1403 | ||
1404 | def __init__(self, m, w): | |
1405 | self.mask = m | |
1406 | self.width = w | |
1407 | ||
1408 | def str1(self, i): | |
c7cefe6c | 1409 | return str_indent(i) + whex(self.mask) |
70e0711a RH |
1410 | |
1411 | def __str__(self): | |
1412 | return self.str1(0) | |
1413 | ||
1414 | def output_code(self, i, extracted, outerbits, outermask): | |
1415 | global decode_function | |
1416 | ind = str_indent(i) | |
1417 | ||
1418 | # If we need to load more bytes, do so now. | |
1419 | if extracted < self.width: | |
9f6e2b4d RH |
1420 | output(ind, f'insn = {decode_function}_load_bytes', |
1421 | f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') | |
70e0711a RH |
1422 | extracted = self.width |
1423 | output(ind, 'return insn;\n') | |
1424 | # end SizeLeaf | |
1425 | ||
1426 | ||
1427 | def build_size_tree(pats, width, outerbits, outermask): | |
1428 | global insnwidth | |
1429 | ||
1430 | # Collect the mask of bits that are fixed in this width | |
1431 | innermask = 0xff << (insnwidth - width) | |
1432 | innermask &= ~outermask | |
1433 | minwidth = None | |
1434 | onewidth = True | |
1435 | for i in pats: | |
1436 | innermask &= i.fixedmask | |
1437 | if minwidth is None: | |
1438 | minwidth = i.width | |
1439 | elif minwidth != i.width: | |
1440 | onewidth = False; | |
1441 | if minwidth < i.width: | |
1442 | minwidth = i.width | |
1443 | ||
1444 | if onewidth: | |
1445 | return SizeLeaf(innermask, minwidth) | |
1446 | ||
1447 | if innermask == 0: | |
1448 | if width < minwidth: | |
1449 | return build_size_tree(pats, width + 8, outerbits, outermask) | |
1450 | ||
1451 | pnames = [] | |
1452 | for p in pats: | |
1453 | pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) | |
1454 | error_with_file(pats[0].file, pats[0].lineno, | |
9f6e2b4d | 1455 | f'overlapping patterns size {width}:', pnames) |
70e0711a RH |
1456 | |
1457 | bins = {} | |
1458 | for i in pats: | |
1459 | fb = i.fixedbits & innermask | |
1460 | if fb in bins: | |
1461 | bins[fb].append(i) | |
1462 | else: | |
1463 | bins[fb] = [i] | |
1464 | ||
1465 | fullmask = outermask | innermask | |
1466 | lens = sorted(bins.keys()) | |
1467 | if len(lens) == 1: | |
1468 | b = lens[0] | |
1469 | return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) | |
1470 | ||
1471 | r = SizeTree(innermask, width) | |
1472 | for b, l in bins.items(): | |
1473 | s = build_size_tree(l, width, b | outerbits, fullmask) | |
1474 | r.subs.append((b, s)) | |
1475 | return r | |
1476 | # end build_size_tree | |
1477 | ||
1478 | ||
70e0711a RH |
1479 | def prop_size(tree): |
1480 | """Propagate minimum widths up the decode size tree""" | |
1481 | ||
1482 | if isinstance(tree, SizeTree): | |
1483 | min = None | |
1484 | for (b, s) in tree.subs: | |
1485 | width = prop_size(s) | |
1486 | if min is None or min > width: | |
1487 | min = width | |
1488 | assert min >= tree.width | |
1489 | tree.width = min | |
1490 | else: | |
1491 | min = tree.width | |
1492 | return min | |
1493 | # end prop_size | |
1494 | ||
1495 | ||
568ae7ef RH |
1496 | def main(): |
1497 | global arguments | |
1498 | global formats | |
0eff2df4 | 1499 | global allpatterns |
568ae7ef RH |
1500 | global translate_scope |
1501 | global translate_prefix | |
1502 | global output_fd | |
1503 | global output_file | |
c6a5fc2a | 1504 | global output_null |
568ae7ef RH |
1505 | global input_file |
1506 | global insnwidth | |
1507 | global insntype | |
83d7c40c | 1508 | global insnmask |
abd04f92 | 1509 | global decode_function |
60c425f3 | 1510 | global bitop_width |
17560e93 | 1511 | global variablewidth |
c6920795 | 1512 | global anyextern |
9b5acc56 | 1513 | global testforerror |
568ae7ef | 1514 | |
568ae7ef RH |
1515 | decode_scope = 'static ' |
1516 | ||
cd3e7fc1 | 1517 | long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', |
c6a5fc2a RH |
1518 | 'static-decode=', 'varinsnwidth=', 'test-for-error', |
1519 | 'output-null'] | |
568ae7ef | 1520 | try: |
abff1abf | 1521 | (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) |
568ae7ef RH |
1522 | except getopt.GetoptError as err: |
1523 | error(0, err) | |
1524 | for o, a in opts: | |
1525 | if o in ('-o', '--output'): | |
1526 | output_file = a | |
1527 | elif o == '--decode': | |
1528 | decode_function = a | |
1529 | decode_scope = '' | |
cd3e7fc1 RH |
1530 | elif o == '--static-decode': |
1531 | decode_function = a | |
568ae7ef RH |
1532 | elif o == '--translate': |
1533 | translate_prefix = a | |
1534 | translate_scope = '' | |
17560e93 RH |
1535 | elif o in ('-w', '--insnwidth', '--varinsnwidth'): |
1536 | if o == '--varinsnwidth': | |
1537 | variablewidth = True | |
568ae7ef RH |
1538 | insnwidth = int(a) |
1539 | if insnwidth == 16: | |
1540 | insntype = 'uint16_t' | |
1541 | insnmask = 0xffff | |
60c425f3 LFFP |
1542 | elif insnwidth == 64: |
1543 | insntype = 'uint64_t' | |
1544 | insnmask = 0xffffffffffffffff | |
1545 | bitop_width = 64 | |
568ae7ef RH |
1546 | elif insnwidth != 32: |
1547 | error(0, 'cannot handle insns of width', insnwidth) | |
9b5acc56 RH |
1548 | elif o == '--test-for-error': |
1549 | testforerror = True | |
c6a5fc2a RH |
1550 | elif o == '--output-null': |
1551 | output_null = True | |
568ae7ef RH |
1552 | else: |
1553 | assert False, 'unhandled option' | |
1554 | ||
1555 | if len(args) < 1: | |
1556 | error(0, 'missing input file') | |
08561fc1 RH |
1557 | |
1558 | toppat = ExcMultiPattern(0) | |
1559 | ||
6699ae6a RH |
1560 | for filename in args: |
1561 | input_file = filename | |
4cacecaa | 1562 | f = open(filename, 'rt', encoding='utf-8') |
08561fc1 | 1563 | parse_file(f, toppat) |
6699ae6a | 1564 | f.close() |
568ae7ef | 1565 | |
08561fc1 RH |
1566 | # We do not want to compute masks for toppat, because those masks |
1567 | # are used as a starting point for build_tree. For toppat, we must | |
1568 | # insist that decode begins from naught. | |
1569 | for i in toppat.pats: | |
1570 | i.prop_masks() | |
1571 | ||
1572 | toppat.build_tree() | |
1573 | toppat.prop_format() | |
1574 | ||
70e0711a | 1575 | if variablewidth: |
08561fc1 RH |
1576 | for i in toppat.pats: |
1577 | i.prop_width() | |
1578 | stree = build_size_tree(toppat.pats, 8, 0, 0) | |
70e0711a RH |
1579 | prop_size(stree) |
1580 | ||
c6a5fc2a RH |
1581 | if output_null: |
1582 | output_fd = open(os.devnull, 'wt', encoding='utf-8', errors="ignore") | |
1583 | elif output_file: | |
4cacecaa | 1584 | output_fd = open(output_file, 'wt', encoding='utf-8') |
568ae7ef | 1585 | else: |
4cacecaa PMD |
1586 | output_fd = io.TextIOWrapper(sys.stdout.buffer, |
1587 | encoding=sys.stdout.encoding, | |
1588 | errors="ignore") | |
568ae7ef RH |
1589 | |
1590 | output_autogen() | |
1591 | for n in sorted(arguments.keys()): | |
1592 | f = arguments[n] | |
1593 | f.output_def() | |
1594 | ||
1595 | # A single translate function can be invoked for different patterns. | |
1596 | # Make sure that the argument sets are the same, and declare the | |
1597 | # function only once. | |
c6920795 RH |
1598 | # |
1599 | # If we're sharing formats, we're likely also sharing trans_* functions, | |
1600 | # but we can't tell which ones. Prevent issues from the compiler by | |
1601 | # suppressing redundant declaration warnings. | |
1602 | if anyextern: | |
7aa12aa2 TH |
1603 | output("#pragma GCC diagnostic push\n", |
1604 | "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", | |
1605 | "#ifdef __clang__\n" | |
c6920795 | 1606 | "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", |
c6920795 RH |
1607 | "#endif\n\n") |
1608 | ||
568ae7ef | 1609 | out_pats = {} |
0eff2df4 | 1610 | for i in allpatterns: |
568ae7ef RH |
1611 | if i.name in out_pats: |
1612 | p = out_pats[i.name] | |
1613 | if i.base.base != p.base.base: | |
1614 | error(0, i.name, ' has conflicting argument sets') | |
1615 | else: | |
1616 | i.output_decl() | |
1617 | out_pats[i.name] = i | |
1618 | output('\n') | |
1619 | ||
c6920795 | 1620 | if anyextern: |
7aa12aa2 | 1621 | output("#pragma GCC diagnostic pop\n\n") |
c6920795 | 1622 | |
568ae7ef RH |
1623 | for n in sorted(formats.keys()): |
1624 | f = formats[n] | |
1625 | f.output_extract() | |
1626 | ||
1627 | output(decode_scope, 'bool ', decode_function, | |
1628 | '(DisasContext *ctx, ', insntype, ' insn)\n{\n') | |
1629 | ||
1630 | i4 = str_indent(4) | |
568ae7ef | 1631 | |
82bfac1c RH |
1632 | if len(allpatterns) != 0: |
1633 | output(i4, 'union {\n') | |
1634 | for n in sorted(arguments.keys()): | |
1635 | f = arguments[n] | |
1636 | output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') | |
1637 | output(i4, '} u;\n\n') | |
08561fc1 | 1638 | toppat.output_code(4, False, 0, 0) |
568ae7ef | 1639 | |
82bfac1c | 1640 | output(i4, 'return false;\n') |
568ae7ef RH |
1641 | output('}\n') |
1642 | ||
70e0711a RH |
1643 | if variablewidth: |
1644 | output('\n', decode_scope, insntype, ' ', decode_function, | |
1645 | '_load(DisasContext *ctx)\n{\n', | |
1646 | ' ', insntype, ' insn = 0;\n\n') | |
1647 | stree.output_code(4, 0, 0, 0) | |
1648 | output('}\n') | |
1649 | ||
568ae7ef RH |
1650 | if output_file: |
1651 | output_fd.close() | |
9b5acc56 | 1652 | exit(1 if testforerror else 0) |
568ae7ef RH |
1653 | # end main |
1654 | ||
1655 | ||
1656 | if __name__ == '__main__': | |
1657 | main() |