2 # Copyright (c) 2018 Linaro Limited
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2 of the License, or (at your option) any later version.
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 # Generate a decoding tree from a specification file.
20 # See the syntax and semantics in docs/devel/decodetree.rst.
36 translate_prefix
= 'trans'
37 translate_scope
= 'static '
42 decode_function
= 'decode'
44 re_ident
= '[a-zA-Z][a-zA-Z0-9_]*'
47 def error_with_file(file, lineno
, *args
):
48 """Print an error message from file:line and args and exit."""
53 r
= '{0}:{1}: error:'.format(file, lineno
)
55 r
= '{0}: error:'.format(file)
62 if output_file
and output_fd
:
64 os
.remove(output_file
)
67 def error(lineno
, *args
):
68 error_with_file(input_file
, lineno
, args
)
76 if sys
.version_info
>= (3, 4):
77 re_fullmatch
= re
.fullmatch
79 def re_fullmatch(pat
, str):
80 return re
.match('^' + pat
+ '$', str)
84 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
88 """Return a string with C spaces"""
92 def str_fields(fields
):
93 """Return a string uniquely identifing FIELDS"""
95 for n
in sorted(fields
.keys()):
100 def str_match_bits(bits
, mask
):
101 """Return a string pretty-printing BITS/MASK"""
104 i
= 1 << (insnwidth
- 1)
122 """Return true iff X is equal to a power of 2."""
123 return (x
& (x
- 1)) == 0
127 """Return the number of times 2 factors into X."""
129 while ((x
>> r
) & 1) == 0:
134 def is_contiguous(bits
):
136 if is_pow2((bits
>> shift
) + 1):
142 def eq_fields_for_args(flds_a
, flds_b
):
143 if len(flds_a
) != len(flds_b
):
145 for k
, a
in flds_a
.items():
151 def eq_fields_for_fmts(flds_a
, flds_b
):
152 if len(flds_a
) != len(flds_b
):
154 for k
, a
in flds_a
.items():
158 if a
.__class
__ != b
.__class
__ or a
!= b
:
164 """Class representing a simple instruction field"""
165 def __init__(self
, sign
, pos
, len):
169 self
.mask
= ((1 << len) - 1) << pos
176 return str(self
.pos
) + ':' + s
+ str(self
.len)
178 def str_extract(self
):
183 return '{0}(insn, {1}, {2})'.format(extr
, self
.pos
, self
.len)
185 def __eq__(self
, other
):
186 return self
.sign
== other
.sign
and self
.sign
== other
.sign
188 def __ne__(self
, other
):
189 return not self
.__eq
__(other
)
194 """Class representing a compound instruction field"""
195 def __init__(self
, subs
, mask
):
197 self
.sign
= subs
[0].sign
201 return str(self
.subs
)
203 def str_extract(self
):
206 for f
in reversed(self
.subs
):
208 ret
= f
.str_extract()
210 ret
= 'deposit32({0}, {1}, {2}, {3})' \
211 .format(ret
, pos
, 32 - pos
, f
.str_extract())
215 def __ne__(self
, other
):
216 if len(self
.subs
) != len(other
.subs
):
218 for a
, b
in zip(self
.subs
, other
.subs
):
219 if a
.__class
__ != b
.__class
__ or a
!= b
:
223 def __eq__(self
, other
):
224 return not self
.__ne
__(other
)
229 """Class representing an argument field with constant value"""
230 def __init__(self
, value
):
233 self
.sign
= value
< 0
236 return str(self
.value
)
238 def str_extract(self
):
239 return str(self
.value
)
241 def __cmp__(self
, other
):
242 return self
.value
- other
.value
247 """Class representing a field passed through an expander"""
248 def __init__(self
, func
, base
):
249 self
.mask
= base
.mask
250 self
.sign
= base
.sign
255 return self
.func
+ '(' + str(self
.base
) + ')'
257 def str_extract(self
):
258 return self
.func
+ '(' + self
.base
.str_extract() + ')'
260 def __eq__(self
, other
):
261 return self
.func
== other
.func
and self
.base
== other
.base
263 def __ne__(self
, other
):
264 return not self
.__eq
__(other
)
269 """Class representing the extracted fields of a format"""
270 def __init__(self
, nm
, flds
, extern
):
273 self
.fields
= sorted(flds
)
276 return self
.name
+ ' ' + str(self
.fields
)
278 def struct_name(self
):
279 return 'arg_' + self
.name
281 def output_def(self
):
283 output('typedef struct {\n')
284 for n
in self
.fields
:
285 output(' int ', n
, ';\n')
286 output('} ', self
.struct_name(), ';\n\n')
291 """Common code between instruction formats and instruction patterns"""
292 def __init__(self
, name
, lineno
, base
, fixb
, fixm
, udfm
, fldm
, flds
):
294 self
.file = input_file
297 self
.fixedbits
= fixb
298 self
.fixedmask
= fixm
299 self
.undefmask
= udfm
300 self
.fieldmask
= fldm
304 return self
.name
+ ' ' + str_match_bits(self
.fixedbits
, self
.fixedmask
)
307 return str_indent(i
) + self
.__str
__()
311 class Format(General
):
312 """Class representing an instruction format"""
314 def extract_name(self
):
315 return 'extract_' + self
.name
317 def output_extract(self
):
318 output('static void ', self
.extract_name(), '(',
319 self
.base
.struct_name(), ' *a, ', insntype
, ' insn)\n{\n')
320 for n
, f
in self
.fields
.items():
321 output(' a->', n
, ' = ', f
.str_extract(), ';\n')
326 class Pattern(General
):
327 """Class representing an instruction pattern"""
329 def output_decl(self
):
330 global translate_scope
331 global translate_prefix
332 output('typedef ', self
.base
.base
.struct_name(),
333 ' arg_', self
.name
, ';\n')
334 output(translate_scope
, 'bool ', translate_prefix
, '_', self
.name
,
335 '(DisasContext *ctx, arg_', self
.name
, ' *a);\n')
337 def output_code(self
, i
, extracted
, outerbits
, outermask
):
338 global translate_prefix
340 arg
= self
.base
.base
.name
341 output(ind
, '/* ', self
.file, ':', str(self
.lineno
), ' */\n')
343 output(ind
, self
.base
.extract_name(), '(&u.f_', arg
, ', insn);\n')
344 for n
, f
in self
.fields
.items():
345 output(ind
, 'u.f_', arg
, '.', n
, ' = ', f
.str_extract(), ';\n')
346 output(ind
, 'if (', translate_prefix
, '_', self
.name
,
347 '(ctx, &u.f_', arg
, ')) return true;\n')
351 class MultiPattern(General
):
352 """Class representing an overlapping set of instruction patterns"""
354 def __init__(self
, lineno
, pats
, fixb
, fixm
, udfm
):
355 self
.file = input_file
359 self
.fixedbits
= fixb
360 self
.fixedmask
= fixm
361 self
.undefmask
= udfm
369 def output_decl(self
):
373 def output_code(self
, i
, extracted
, outerbits
, outermask
):
374 global translate_prefix
377 if outermask
!= p
.fixedmask
:
378 innermask
= p
.fixedmask
& ~outermask
379 innerbits
= p
.fixedbits
& ~outermask
380 output(ind
, 'if ((insn & ',
381 '0x{0:08x}) == 0x{1:08x}'.format(innermask
, innerbits
),
384 str_match_bits(p
.fixedbits
, p
.fixedmask
), ' */\n')
385 p
.output_code(i
+ 4, extracted
, p
.fixedbits
, p
.fixedmask
)
388 p
.output_code(i
, extracted
, p
.fixedbits
, p
.fixedmask
)
392 def parse_field(lineno
, name
, toks
):
393 """Parse one instruction field from TOKS at LINENO"""
398 # A "simple" field will have only one entry;
399 # a "multifield" will have several.
404 if re_fullmatch('!function=' + re_ident
, t
):
406 error(lineno
, 'duplicate function')
411 if re_fullmatch('[0-9]+:s[0-9]+', t
):
412 # Signed field extract
413 subtoks
= t
.split(':s')
415 elif re_fullmatch('[0-9]+:[0-9]+', t
):
416 # Unsigned field extract
417 subtoks
= t
.split(':')
420 error(lineno
, 'invalid field token "{0}"'.format(t
))
423 if po
+ le
> insnwidth
:
424 error(lineno
, 'field {0} too large'.format(t
))
425 f
= Field(sign
, po
, le
)
429 if width
> insnwidth
:
430 error(lineno
, 'field too large')
437 error(lineno
, 'field components overlap')
439 f
= MultiField(subs
, mask
)
441 f
= FunctionField(func
, f
)
444 error(lineno
, 'duplicate field', name
)
449 def parse_arguments(lineno
, name
, toks
):
450 """Parse one argument set from TOKS at LINENO"""
457 if re_fullmatch('!extern', t
):
460 if not re_fullmatch(re_ident
, t
):
461 error(lineno
, 'invalid argument set token "{0}"'.format(t
))
463 error(lineno
, 'duplicate argument "{0}"'.format(t
))
466 if name
in arguments
:
467 error(lineno
, 'duplicate argument set', name
)
468 arguments
[name
] = Arguments(name
, flds
, extern
)
469 # end parse_arguments
472 def lookup_field(lineno
, name
):
476 error(lineno
, 'undefined field', name
)
479 def add_field(lineno
, flds
, new_name
, f
):
481 error(lineno
, 'duplicate field', new_name
)
486 def add_field_byname(lineno
, flds
, new_name
, old_name
):
487 return add_field(lineno
, flds
, new_name
, lookup_field(lineno
, old_name
))
490 def infer_argument_set(flds
):
492 global decode_function
494 for arg
in arguments
.values():
495 if eq_fields_for_args(flds
, arg
.fields
):
498 name
= decode_function
+ str(len(arguments
))
499 arg
= Arguments(name
, flds
.keys(), False)
500 arguments
[name
] = arg
504 def infer_format(arg
, fieldmask
, flds
):
507 global decode_function
511 for n
, c
in flds
.items():
517 # Look for an existing format with the same argument set and fields
518 for fmt
in formats
.values():
519 if arg
and fmt
.base
!= arg
:
521 if fieldmask
!= fmt
.fieldmask
:
523 if not eq_fields_for_fmts(flds
, fmt
.fields
):
525 return (fmt
, const_flds
)
527 name
= decode_function
+ '_Fmt_' + str(len(formats
))
529 arg
= infer_argument_set(flds
)
531 fmt
= Format(name
, 0, arg
, 0, 0, 0, fieldmask
, var_flds
)
534 return (fmt
, const_flds
)
538 def parse_generic(lineno
, is_format
, name
, toks
):
539 """Parse one instruction format from TOKS at LINENO"""
557 # '&Foo' gives a format an explcit argument set.
561 error(lineno
, 'multiple argument sets')
565 error(lineno
, 'undefined argument set', t
)
568 # '@Foo' gives a pattern an explicit format.
572 error(lineno
, 'multiple formats')
576 error(lineno
, 'undefined format', t
)
579 # '%Foo' imports a field.
582 flds
= add_field_byname(lineno
, flds
, tt
, tt
)
585 # 'Foo=%Bar' imports a field with a different name.
586 if re_fullmatch(re_ident
+ '=%' + re_ident
, t
):
587 (fname
, iname
) = t
.split('=%')
588 flds
= add_field_byname(lineno
, flds
, fname
, iname
)
591 # 'Foo=number' sets an argument field to a constant value
592 if re_fullmatch(re_ident
+ '=[0-9]+', t
):
593 (fname
, value
) = t
.split('=')
595 flds
= add_field(lineno
, flds
, fname
, ConstField(value
))
598 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
599 # required ones, or dont-cares.
600 if re_fullmatch('[01.-]+', t
):
602 fms
= t
.replace('0', '1')
603 fms
= fms
.replace('.', '0')
604 fms
= fms
.replace('-', '0')
605 fbs
= t
.replace('.', '0')
606 fbs
= fbs
.replace('-', '0')
607 ubm
= t
.replace('1', '0')
608 ubm
= ubm
.replace('.', '0')
609 ubm
= ubm
.replace('-', '1')
613 fixedbits
= (fixedbits
<< shift
) | fbs
614 fixedmask
= (fixedmask
<< shift
) | fms
615 undefmask
= (undefmask
<< shift
) | ubm
616 # Otherwise, fieldname:fieldwidth
617 elif re_fullmatch(re_ident
+ ':s?[0-9]+', t
):
618 (fname
, flen
) = t
.split(':')
623 shift
= int(flen
, 10)
624 f
= Field(sign
, insnwidth
- width
- shift
, shift
)
625 flds
= add_field(lineno
, flds
, fname
, f
)
630 error(lineno
, 'invalid token "{0}"'.format(t
))
633 # We should have filled in all of the bits of the instruction.
634 if not (is_format
and width
== 0) and width
!= insnwidth
:
635 error(lineno
, 'definition has {0} bits'.format(width
))
637 # Do not check for fields overlaping fields; one valid usage
638 # is to be able to duplicate fields via import.
640 for f
in flds
.values():
643 # Fix up what we've parsed to match either a format or a pattern.
645 # Formats cannot reference formats.
647 error(lineno
, 'format referencing format')
648 # If an argument set is given, then there should be no fields
649 # without a place to store it.
651 for f
in flds
.keys():
652 if f
not in arg
.fields
:
653 error(lineno
, 'field {0} not in argument set {1}'
654 .format(f
, arg
.name
))
656 arg
= infer_argument_set(flds
)
658 error(lineno
, 'duplicate format name', name
)
659 fmt
= Format(name
, lineno
, arg
, fixedbits
, fixedmask
,
660 undefmask
, fieldmask
, flds
)
663 # Patterns can reference a format ...
665 # ... but not an argument simultaneously
667 error(lineno
, 'pattern specifies both format and argument set')
668 if fixedmask
& fmt
.fixedmask
:
669 error(lineno
, 'pattern fixed bits overlap format fixed bits')
670 fieldmask |
= fmt
.fieldmask
671 fixedbits |
= fmt
.fixedbits
672 fixedmask |
= fmt
.fixedmask
673 undefmask |
= fmt
.undefmask
675 (fmt
, flds
) = infer_format(arg
, fieldmask
, flds
)
677 for f
in flds
.keys():
678 if f
not in arg
.fields
:
679 error(lineno
, 'field {0} not in argument set {1}'
680 .format(f
, arg
.name
))
681 if f
in fmt
.fields
.keys():
682 error(lineno
, 'field {0} set by format and pattern'.format(f
))
684 if f
not in flds
.keys() and f
not in fmt
.fields
.keys():
685 error(lineno
, 'field {0} not initialized'.format(f
))
686 pat
= Pattern(name
, lineno
, fmt
, fixedbits
, fixedmask
,
687 undefmask
, fieldmask
, flds
)
689 allpatterns
.append(pat
)
691 # Validate the masks that we have assembled.
692 if fieldmask
& fixedmask
:
693 error(lineno
, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
694 .format(fieldmask
, fixedmask
))
695 if fieldmask
& undefmask
:
696 error(lineno
, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
697 .format(fieldmask
, undefmask
))
698 if fixedmask
& undefmask
:
699 error(lineno
, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
700 .format(fixedmask
, undefmask
))
702 allbits
= fieldmask | fixedmask | undefmask
703 if allbits
!= insnmask
:
704 error(lineno
, 'bits left unspecified (0x{0:08x})'
705 .format(allbits ^ insnmask
))
708 def build_multi_pattern(lineno
, pats
):
709 """Validate the Patterns going into a MultiPattern."""
714 error(lineno
, 'less than two patterns within braces')
719 # Collect fixed/undefmask for all of the children.
720 # Move the defining lineno back to that of the first child.
722 fixedmask
&= p
.fixedmask
723 undefmask
&= p
.undefmask
724 if p
.lineno
< lineno
:
730 error(lineno
, 'no overlap in patterns within braces')
733 thisbits
= p
.fixedbits
& fixedmask
734 if fixedbits
is None:
736 elif fixedbits
!= thisbits
:
737 fixedmask
&= ~
(fixedbits ^ thisbits
)
742 mp
= MultiPattern(lineno
, pats
, fixedbits
, fixedmask
, undefmask
)
744 # end build_multi_pattern
747 """Parse all of the patterns within a file"""
751 # Read all of the lines of the file. Concatenate lines
752 # ending in backslash; discard empty lines and comments.
761 # Expand and strip spaces, to find indent.
763 line
= line
.expandtabs()
775 # Next line after continuation
778 # Allow completely blank lines.
782 # Empty line due to comment.
784 # Indentation must be correct, even for comment lines.
785 if indent
!= nesting
:
786 error(lineno
, 'indentation ', indent
, ' != ', nesting
)
788 start_lineno
= lineno
802 error(start_lineno
, 'mismatched close brace')
804 error(start_lineno
, 'extra tokens after close brace')
806 if indent
!= nesting
:
807 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
809 patterns
= saved_pats
.pop()
810 build_multi_pattern(lineno
, pats
)
814 # Everything else should have current indentation.
815 if indent
!= nesting
:
816 error(start_lineno
, 'indentation ', indent
, ' != ', nesting
)
821 error(start_lineno
, 'extra tokens after open brace')
822 saved_pats
.append(patterns
)
828 # Determine the type of object needing to be parsed.
830 parse_field(start_lineno
, name
[1:], toks
)
832 parse_arguments(start_lineno
, name
[1:], toks
)
834 parse_generic(start_lineno
, True, name
[1:], toks
)
836 parse_generic(start_lineno
, False, name
, toks
)
842 """Class representing a node in a decode tree"""
844 def __init__(self
, fm
, tm
):
852 r
= '{0}{1:08x}'.format(ind
, self
.fixedmask
)
854 r
+= ' ' + self
.format
.name
856 for (b
, s
) in self
.subs
:
857 r
+= '{0} {1:08x}:\n'.format(ind
, b
)
858 r
+= s
.str1(i
+ 4) + '\n'
865 def output_code(self
, i
, extracted
, outerbits
, outermask
):
868 # If we identified all nodes below have the same format,
869 # extract the fields now.
870 if not extracted
and self
.base
:
871 output(ind
, self
.base
.extract_name(),
872 '(&u.f_', self
.base
.base
.name
, ', insn);\n')
875 # Attempt to aid the compiler in producing compact switch statements.
876 # If the bits in the mask are contiguous, extract them.
877 sh
= is_contiguous(self
.thismask
)
879 # Propagate SH down into the local functions.
880 def str_switch(b
, sh
=sh
):
881 return '(insn >> {0}) & 0x{1:x}'.format(sh
, b
>> sh
)
883 def str_case(b
, sh
=sh
):
884 return '0x{0:x}'.format(b
>> sh
)
887 return 'insn & 0x{0:08x}'.format(b
)
890 return '0x{0:08x}'.format(b
)
892 output(ind
, 'switch (', str_switch(self
.thismask
), ') {\n')
893 for b
, s
in sorted(self
.subs
):
894 assert (self
.thismask
& ~s
.fixedmask
) == 0
895 innermask
= outermask | self
.thismask
896 innerbits
= outerbits | b
897 output(ind
, 'case ', str_case(b
), ':\n')
899 str_match_bits(innerbits
, innermask
), ' */\n')
900 s
.output_code(i
+ 4, extracted
, innerbits
, innermask
)
901 output(ind
, ' return false;\n')
906 def build_tree(pats
, outerbits
, outermask
):
907 # Find the intersection of all remaining fixedmask.
908 innermask
= ~outermask
& insnmask
910 innermask
&= i
.fixedmask
913 text
= 'overlapping patterns:'
915 text
+= '\n' + p
.file + ':' + str(p
.lineno
) + ': ' + str(p
)
916 error_with_file(pats
[0].file, pats
[0].lineno
, text
)
918 fullmask
= outermask | innermask
920 # Sort each element of pats into the bin selected by the mask.
923 fb
= i
.fixedbits
& innermask
929 # We must recurse if any bin has more than one element or if
930 # the single element in the bin has not been fully matched.
931 t
= Tree(fullmask
, innermask
)
933 for b
, l
in bins
.items():
935 if len(l
) > 1 or s
.fixedmask
& ~fullmask
!= 0:
936 s
= build_tree(l
, b | outerbits
, fullmask
)
937 t
.subs
.append((b
, s
))
943 def prop_format(tree
):
944 """Propagate Format objects into the decode tree"""
946 # Depth first search.
947 for (b
, s
) in tree
.subs
:
948 if isinstance(s
, Tree
):
951 # If all entries in SUBS have the same format, then
952 # propagate that into the tree.
954 for (b
, s
) in tree
.subs
:
970 global translate_scope
971 global translate_prefix
978 global decode_function
980 decode_scope
= 'static '
982 long_opts
= ['decode=', 'translate=', 'output=', 'insnwidth=']
984 (opts
, args
) = getopt
.getopt(sys
.argv
[1:], 'o:w:', long_opts
)
985 except getopt
.GetoptError
as err
:
988 if o
in ('-o', '--output'):
990 elif o
== '--decode':
993 elif o
== '--translate':
996 elif o
in ('-w', '--insnwidth'):
999 insntype
= 'uint16_t'
1001 elif insnwidth
!= 32:
1002 error(0, 'cannot handle insns of width', insnwidth
)
1004 assert False, 'unhandled option'
1007 error(0, 'missing input file')
1008 for filename
in args
:
1009 input_file
= filename
1010 f
= open(filename
, 'r')
1014 t
= build_tree(patterns
, 0, 0)
1018 output_fd
= open(output_file
, 'w')
1020 output_fd
= sys
.stdout
1023 for n
in sorted(arguments
.keys()):
1027 # A single translate function can be invoked for different patterns.
1028 # Make sure that the argument sets are the same, and declare the
1029 # function only once.
1031 for i
in allpatterns
:
1032 if i
.name
in out_pats
:
1033 p
= out_pats
[i
.name
]
1034 if i
.base
.base
!= p
.base
.base
:
1035 error(0, i
.name
, ' has conflicting argument sets')
1038 out_pats
[i
.name
] = i
1041 for n
in sorted(formats
.keys()):
1045 output(decode_scope
, 'bool ', decode_function
,
1046 '(DisasContext *ctx, ', insntype
, ' insn)\n{\n')
1049 output(i4
, 'union {\n')
1050 for n
in sorted(arguments
.keys()):
1052 output(i4
, i4
, f
.struct_name(), ' f_', f
.name
, ';\n')
1053 output(i4
, '} u;\n\n')
1055 t
.output_code(4, False, 0, 0)
1056 output(i4
, 'return false;\n')
1065 if __name__
== '__main__':