3 # Copyright 2007 Neal Norwitz
4 # Portions Copyright 2007 Google Inc.
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
18 """Generate an Abstract Syntax Tree (AST) for C++."""
21 # * Tokens should never be exported, need to convert to Nodes
22 # (return types, parameters, etc.)
23 # * Handle static class data for templatized classes
24 # * Handle casts (both C++ and C-style)
25 # * Handle conditions and loops (if/else, switch, for, while/do)
27 # TODO much, much later:
37 import __builtin__
as builtins
43 from cpp
import keywords
44 from cpp
import tokenize
48 if not hasattr(builtins
, 'reversed'):
49 # Support Python 2.3 and earlier.
51 for i
in range(len(seq
)-1, -1, -1):
54 if not hasattr(builtins
, 'next'):
55 # Support Python 2.5 and earlier.
60 VISIBILITY_PUBLIC
, VISIBILITY_PROTECTED
, VISIBILITY_PRIVATE
= range(3)
64 FUNCTION_VIRTUAL
= 0x02
65 FUNCTION_PURE_VIRTUAL
= 0x04
68 FUNCTION_ATTRIBUTE
= 0x20
69 FUNCTION_UNKNOWN_ANNOTATION
= 0x40
71 FUNCTION_OVERRIDE
= 0x100
74 These are currently unused. Should really handle these properly at some point.
76 TYPE_MODIFIER_INLINE = 0x010000
77 TYPE_MODIFIER_EXTERN = 0x020000
78 TYPE_MODIFIER_STATIC = 0x040000
79 TYPE_MODIFIER_CONST = 0x080000
80 TYPE_MODIFIER_REGISTER = 0x100000
81 TYPE_MODIFIER_VOLATILE = 0x200000
82 TYPE_MODIFIER_MUTABLE = 0x400000
85 'inline': TYPE_MODIFIER_INLINE,
86 'extern': TYPE_MODIFIER_EXTERN,
87 'static': TYPE_MODIFIER_STATIC,
88 'const': TYPE_MODIFIER_CONST,
89 'register': TYPE_MODIFIER_REGISTER,
90 'volatile': TYPE_MODIFIER_VOLATILE,
91 'mutable': TYPE_MODIFIER_MUTABLE,
95 _INTERNAL_TOKEN
= 'internal'
96 _NAMESPACE_POP
= 'ns-pop'
99 # TODO(nnorwitz): use this as a singleton for templated_types, etc
100 # where we don't want to create a new empty dict each time. It is also const.
101 class _NullDict(object):
102 __contains__
= lambda self
: False
103 keys
= values
= items
= iterkeys
= itervalues
= iteritems
= lambda self
: ()
106 # TODO(nnorwitz): move AST nodes into a separate module.
110 def __init__(self
, start
, end
):
114 def IsDeclaration(self
):
115 """Returns bool if this node is a declaration."""
118 def IsDefinition(self
):
119 """Returns bool if this node is a definition."""
122 def IsExportable(self
):
123 """Returns bool if this node exportable from a header file."""
126 def Requires(self
, node
):
127 """Does this AST node require the definition of the node passed in?"""
130 def XXX__str__(self
):
131 return self
._StringHelper
(self
.__class
__.__name
__, '')
133 def _StringHelper(self
, name
, suffix
):
135 return '%s(%s)' % (name
, suffix
)
136 return '%s(%d, %d, %s)' % (name
, self
.start
, self
.end
, suffix
)
143 def __init__(self
, start
, end
, name
, definition
):
144 Node
.__init
__(self
, start
, end
)
146 self
.definition
= definition
149 value
= '%s %s' % (self
.name
, self
.definition
)
150 return self
._StringHelper
(self
.__class
__.__name
__, value
)
154 def __init__(self
, start
, end
, filename
, system
):
155 Node
.__init
__(self
, start
, end
)
156 self
.filename
= filename
163 return self
._StringHelper
(self
.__class
__.__name
__, fmt
% self
.filename
)
167 def __init__(self
, start
, end
, label
):
168 Node
.__init
__(self
, start
, end
)
172 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.label
))
176 def __init__(self
, start
, end
, expr
):
177 Node
.__init
__(self
, start
, end
)
180 def Requires(self
, node
):
181 # TODO(nnorwitz): impl.
185 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.expr
))
197 def __init__(self
, start
, end
, expr
, namespace
):
198 Expr
.__init
__(self
, start
, end
, expr
)
199 self
.namespace
= namespace
[:]
203 def __init__(self
, start
, end
, names
):
204 Node
.__init
__(self
, start
, end
)
208 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.names
))
211 class Parameter(Node
):
212 def __init__(self
, start
, end
, name
, parameter_type
, default
):
213 Node
.__init
__(self
, start
, end
)
215 self
.type = parameter_type
216 self
.default
= default
218 def Requires(self
, node
):
219 # TODO(nnorwitz): handle namespaces, etc.
220 return self
.type.name
== node
.name
223 name
= str(self
.type)
224 suffix
= '%s %s' % (name
, self
.name
)
226 suffix
+= ' = ' + ''.join([d
.name
for d
in self
.default
])
227 return self
._StringHelper
(self
.__class
__.__name
__, suffix
)
230 class _GenericDeclaration(Node
):
231 def __init__(self
, start
, end
, name
, namespace
):
232 Node
.__init
__(self
, start
, end
)
234 self
.namespace
= namespace
[:]
238 if self
.namespace
and self
.namespace
[-1]:
239 prefix
= '::'.join(self
.namespace
) + '::'
240 return prefix
+ self
.name
242 def _TypeStringHelper(self
, suffix
):
244 names
= [n
or '<anonymous>' for n
in self
.namespace
]
245 suffix
+= ' in ' + '::'.join(names
)
246 return self
._StringHelper
(self
.__class
__.__name
__, suffix
)
249 # TODO(nnorwitz): merge with Parameter in some way?
250 class VariableDeclaration(_GenericDeclaration
):
251 def __init__(self
, start
, end
, name
, var_type
, initial_value
, namespace
):
252 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
254 self
.initial_value
= initial_value
256 def Requires(self
, node
):
257 # TODO(nnorwitz): handle namespaces, etc.
258 return self
.type.name
== node
.name
261 """Return a string that tries to reconstitute the variable decl."""
262 suffix
= '%s %s' % (self
.type, self
.name
)
263 if self
.initial_value
:
264 suffix
+= ' = ' + self
.initial_value
268 return self
._StringHelper
(self
.__class
__.__name
__, self
.ToString())
271 class Typedef(_GenericDeclaration
):
272 def __init__(self
, start
, end
, name
, alias
, namespace
):
273 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
276 def IsDefinition(self
):
279 def IsExportable(self
):
282 def Requires(self
, node
):
283 # TODO(nnorwitz): handle namespaces, etc.
285 for token
in self
.alias
:
286 if token
is not None and name
== token
.name
:
291 suffix
= '%s, %s' % (self
.name
, self
.alias
)
292 return self
._TypeStringHelper
(suffix
)
295 class _NestedType(_GenericDeclaration
):
296 def __init__(self
, start
, end
, name
, fields
, namespace
):
297 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
300 def IsDefinition(self
):
303 def IsExportable(self
):
307 suffix
= '%s, {%s}' % (self
.name
, self
.fields
)
308 return self
._TypeStringHelper
(suffix
)
311 class Union(_NestedType
):
315 class Enum(_NestedType
):
319 class Class(_GenericDeclaration
):
320 def __init__(self
, start
, end
, name
, bases
, templated_types
, body
, namespace
):
321 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
324 self
.templated_types
= templated_types
326 def IsDeclaration(self
):
327 return self
.bases
is None and self
.body
is None
329 def IsDefinition(self
):
330 return not self
.IsDeclaration()
332 def IsExportable(self
):
333 return not self
.IsDeclaration()
335 def Requires(self
, node
):
336 # TODO(nnorwitz): handle namespaces, etc.
338 for token_list
in self
.bases
:
339 # TODO(nnorwitz): bases are tokens, do name comparison.
340 for token
in token_list
:
341 if token
.name
== node
.name
:
343 # TODO(nnorwitz): search in body too.
348 if self
.templated_types
:
349 name
+= '<%s>' % self
.templated_types
350 suffix
= '%s, %s, %s' % (name
, self
.bases
, self
.body
)
351 return self
._TypeStringHelper
(suffix
)
358 class Function(_GenericDeclaration
):
359 def __init__(self
, start
, end
, name
, return_type
, parameters
,
360 modifiers
, templated_types
, body
, namespace
):
361 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
362 converter
= TypeConverter(namespace
)
363 self
.return_type
= converter
.CreateReturnType(return_type
)
364 self
.parameters
= converter
.ToParameters(parameters
)
365 self
.modifiers
= modifiers
367 self
.templated_types
= templated_types
369 def IsDeclaration(self
):
370 return self
.body
is None
372 def IsDefinition(self
):
373 return self
.body
is not None
375 def IsExportable(self
):
376 if self
.return_type
and 'static' in self
.return_type
.modifiers
:
378 return None not in self
.namespace
380 def Requires(self
, node
):
382 # TODO(nnorwitz): parameters are tokens, do name comparison.
383 for p
in self
.parameters
:
384 if p
.name
== node
.name
:
386 # TODO(nnorwitz): search in body too.
390 # TODO(nnorwitz): add templated_types.
391 suffix
= ('%s %s(%s), 0x%02x, %s' %
392 (self
.return_type
, self
.name
, self
.parameters
,
393 self
.modifiers
, self
.body
))
394 return self
._TypeStringHelper
(suffix
)
397 class Method(Function
):
398 def __init__(self
, start
, end
, name
, in_class
, return_type
, parameters
,
399 modifiers
, templated_types
, body
, namespace
):
400 Function
.__init
__(self
, start
, end
, name
, return_type
, parameters
,
401 modifiers
, templated_types
, body
, namespace
)
402 # TODO(nnorwitz): in_class could also be a namespace which can
403 # mess up finding functions properly.
404 self
.in_class
= in_class
407 class Type(_GenericDeclaration
):
408 """Type used for any variable (eg class, primitive, struct, etc)."""
410 def __init__(self
, start
, end
, name
, templated_types
, modifiers
,
411 reference
, pointer
, array
):
414 name: str name of main type
415 templated_types: [Class (Type?)] template type info between <>
416 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
417 reference, pointer, array: bools
419 _GenericDeclaration
.__init
__(self
, start
, end
, name
, [])
420 self
.templated_types
= templated_types
421 if not name
and modifiers
:
422 self
.name
= modifiers
.pop()
423 self
.modifiers
= modifiers
424 self
.reference
= reference
425 self
.pointer
= pointer
431 prefix
= ' '.join(self
.modifiers
) + ' '
432 name
= str(self
.name
)
433 if self
.templated_types
:
434 name
+= '<%s>' % self
.templated_types
435 suffix
= prefix
+ name
442 return self
._TypeStringHelper
(suffix
)
444 # By definition, Is* are always False. A Type can only exist in
445 # some sort of variable declaration, parameter, or return value.
446 def IsDeclaration(self
):
449 def IsDefinition(self
):
452 def IsExportable(self
):
456 class TypeConverter(object):
458 def __init__(self
, namespace_stack
):
459 self
.namespace_stack
= namespace_stack
461 def _GetTemplateEnd(self
, tokens
, start
):
467 if token
.name
== '<':
469 elif token
.name
== '>':
473 return tokens
[start
:end
-1], end
475 def ToType(self
, tokens
):
476 """Convert [Token,...] to [Class(...), ] useful for base classes.
477 For example, code like class Foo : public Bar<x, y> { ... };
478 the "Bar<x, y>" portion gets converted to an AST.
485 reference
= pointer
= array
= False
487 def AddType(templated_types
):
488 # Partition tokens into name and modifier tokens.
491 for t
in name_tokens
:
492 if keywords
.IsKeyword(t
.name
):
493 modifiers
.append(t
.name
)
496 name
= ''.join(names
)
498 result
.append(Type(name_tokens
[0].start
, name_tokens
[-1].end
,
499 name
, templated_types
, modifiers
,
500 reference
, pointer
, array
))
507 if token
.name
== '<':
508 new_tokens
, new_end
= self
._GetTemplateEnd
(tokens
, i
+1)
509 AddType(self
.ToType(new_tokens
))
510 # If there is a comma after the template, we need to consume
511 # that here otherwise it becomes part of the name.
513 reference
= pointer
= array
= False
514 elif token
.name
== ',':
516 reference
= pointer
= array
= False
517 elif token
.name
== '*':
519 elif token
.name
== '&':
521 elif token
.name
== '[':
523 elif token
.name
== ']':
526 name_tokens
.append(token
)
530 # No '<' in the tokens, just a simple name and no template.
534 def DeclarationToParts(self
, parts
, needs_name_removed
):
537 if needs_name_removed
:
538 # Handle default (initial) values properly.
539 for i
, t
in enumerate(parts
):
541 default
= parts
[i
+1:]
542 name
= parts
[i
-1].name
543 if name
== ']' and parts
[i
-2].name
== '[':
544 name
= parts
[i
-3].name
549 if parts
[-1].token_type
== tokenize
.NAME
:
550 name
= parts
.pop().name
552 # TODO(nnorwitz): this is a hack that happens for code like
553 # Register(Foo<T>); where it thinks this is a function call
554 # but it's actually a declaration.
564 if keywords
.IsKeyword(p
.name
):
565 modifiers
.append(p
.name
)
567 templated_tokens
, new_end
= self
._GetTemplateEnd
(parts
, i
+1)
568 templated_types
= self
.ToType(templated_tokens
)
570 # Don't add a spurious :: to data members being initialized.
572 if next_index
< end
and parts
[next_index
].name
== '::':
574 elif p
.name
in ('[', ']', '='):
575 # These are handled elsewhere.
576 other_tokens
.append(p
)
577 elif p
.name
not in ('*', '&', '>'):
578 # Ensure that names have a space between them.
579 if (type_name
and type_name
[-1].token_type
== tokenize
.NAME
and
580 p
.token_type
== tokenize
.NAME
):
581 type_name
.append(tokenize
.Token(tokenize
.SYNTAX
, ' ', 0, 0))
584 other_tokens
.append(p
)
586 type_name
= ''.join([t
.name
for t
in type_name
])
587 return name
, type_name
, templated_types
, modifiers
, default
, other_tokens
589 def ToParameters(self
, tokens
):
594 name
= type_name
= ''
596 pointer
= reference
= array
= False
600 def AddParameter(end
):
602 del default
[0] # Remove flag.
603 parts
= self
.DeclarationToParts(type_modifiers
, True)
604 (name
, type_name
, templated_types
, modifiers
,
605 unused_default
, unused_other_tokens
) = parts
606 parameter_type
= Type(first_token
.start
, first_token
.end
,
607 type_name
, templated_types
, modifiers
,
608 reference
, pointer
, array
)
609 p
= Parameter(first_token
.start
, end
, name
,
610 parameter_type
, default
)
619 # Check for braces before templates, as we can have unmatched '<>'
620 # inside default arguments.
626 type_modifiers
.append(s
)
633 if template_count
> 0:
634 type_modifiers
.append(s
)
638 AddParameter(s
.start
)
639 name
= type_name
= ''
641 pointer
= reference
= array
= False
651 pass # Just don't add to type_modifiers.
653 # Got a default value. Add any value (None) as a flag.
658 type_modifiers
.append(s
)
659 AddParameter(tokens
[-1].end
)
662 def CreateReturnType(self
, return_type_seq
):
663 if not return_type_seq
:
665 start
= return_type_seq
[0].start
666 end
= return_type_seq
[-1].end
667 _
, name
, templated_types
, modifiers
, default
, other_tokens
= \
668 self
.DeclarationToParts(return_type_seq
, False)
669 names
= [n
.name
for n
in other_tokens
]
670 reference
= '&' in names
671 pointer
= '*' in names
673 return Type(start
, end
, name
, templated_types
, modifiers
,
674 reference
, pointer
, array
)
676 def GetTemplateIndices(self
, names
):
677 # names is a list of strings.
678 start
= names
.index('<')
681 if names
[end
] == '>':
686 class AstBuilder(object):
687 def __init__(self
, token_stream
, filename
, in_class
='', visibility
=None,
689 self
.tokens
= token_stream
690 self
.filename
= filename
691 # TODO(nnorwitz): use a better data structure (deque) for the queue.
692 # Switching directions of the "queue" improved perf by about 25%.
693 # Using a deque should be even better since we access from both sides.
694 self
.token_queue
= []
695 self
.namespace_stack
= namespace_stack
[:]
696 self
.in_class
= in_class
698 self
.in_class_name_only
= None
700 self
.in_class_name_only
= in_class
.split('::')[-1]
701 self
.visibility
= visibility
702 self
.in_function
= False
703 self
.current_token
= None
704 # Keep the state whether we are currently handling a typedef or not.
705 self
._handling
_typedef
= False
707 self
.converter
= TypeConverter(self
.namespace_stack
)
709 def HandleError(self
, msg
, token
):
710 printable_queue
= list(reversed(self
.token_queue
[-20:]))
711 sys
.stderr
.write('Got %s in %s @ %s %s\n' %
712 (msg
, self
.filename
, token
, printable_queue
))
716 token
= self
._GetNextToken
()
720 # Get the next token.
721 self
.current_token
= token
723 # Dispatch on the next token type.
724 if token
.token_type
== _INTERNAL_TOKEN
:
725 if token
.name
== _NAMESPACE_POP
:
726 self
.namespace_stack
.pop()
730 result
= self
._GenerateOne
(token
)
731 if result
is not None:
734 self
.HandleError('exception', token
)
737 def _CreateVariable(self
, pos_token
, name
, type_name
, type_modifiers
,
738 ref_pointer_name_seq
, templated_types
, value
=None):
739 reference
= '&' in ref_pointer_name_seq
740 pointer
= '*' in ref_pointer_name_seq
741 array
= '[' in ref_pointer_name_seq
742 var_type
= Type(pos_token
.start
, pos_token
.end
, type_name
,
743 templated_types
, type_modifiers
,
744 reference
, pointer
, array
)
745 return VariableDeclaration(pos_token
.start
, pos_token
.end
,
746 name
, var_type
, value
, self
.namespace_stack
)
748 def _GenerateOne(self
, token
):
749 if token
.token_type
== tokenize
.NAME
:
750 if (keywords
.IsKeyword(token
.name
) and
751 not keywords
.IsBuiltinType(token
.name
)):
752 if token
.name
== 'enum':
753 # Pop the next token and only put it back if it's not
754 # 'class'. This allows us to support the two-token
755 # 'enum class' keyword as if it were simply 'enum'.
756 next
= self
._GetNextToken
()
757 if next
.name
!= 'class':
758 self
._AddBackToken
(next
)
760 method
= getattr(self
, 'handle_' + token
.name
)
762 elif token
.name
== self
.in_class_name_only
:
763 # The token name is the same as the class, must be a ctor if
764 # there is a paren. Otherwise, it's the return type.
765 # Peek ahead to get the next token to figure out which.
766 next
= self
._GetNextToken
()
767 self
._AddBackToken
(next
)
768 if next
.token_type
== tokenize
.SYNTAX
and next
.name
== '(':
769 return self
._GetMethod
([token
], FUNCTION_CTOR
, None, True)
770 # Fall through--handle like any other method.
772 # Handle data or function declaration/definition.
773 syntax
= tokenize
.SYNTAX
774 temp_tokens
, last_token
= \
775 self
._GetVarTokensUpToIgnoringTemplates
(syntax
,
777 temp_tokens
.insert(0, token
)
778 if last_token
.name
== '(':
779 # If there is an assignment before the paren,
780 # this is an expression, not a method.
781 expr
= bool([e
for e
in temp_tokens
if e
.name
== '='])
783 new_temp
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
784 temp_tokens
.append(last_token
)
785 temp_tokens
.extend(new_temp
)
786 last_token
= tokenize
.Token(tokenize
.SYNTAX
, ';', 0, 0)
788 if last_token
.name
== '[':
789 # Handle array, this isn't a method, unless it's an operator.
790 # TODO(nnorwitz): keep the size somewhere.
791 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
792 temp_tokens
.append(last_token
)
793 if temp_tokens
[-2].name
== 'operator':
794 temp_tokens
.append(self
._GetNextToken
())
796 temp_tokens2
, last_token
= \
797 self
._GetVarTokensUpTo
(tokenize
.SYNTAX
, ';')
798 temp_tokens
.extend(temp_tokens2
)
800 if last_token
.name
== ';':
801 # Handle data, this isn't a method.
802 parts
= self
.converter
.DeclarationToParts(temp_tokens
, True)
803 (name
, type_name
, templated_types
, modifiers
, default
,
804 unused_other_tokens
) = parts
807 names
= [t
.name
for t
in temp_tokens
]
809 start
, end
= self
.converter
.GetTemplateIndices(names
)
810 names
= names
[:start
] + names
[end
:]
811 default
= ''.join([t
.name
for t
in default
])
812 return self
._CreateVariable
(t0
, name
, type_name
, modifiers
,
813 names
, templated_types
, default
)
814 if last_token
.name
== '{':
815 self
._AddBackTokens
(temp_tokens
[1:])
816 self
._AddBackToken
(last_token
)
817 method_name
= temp_tokens
[0].name
818 method
= getattr(self
, 'handle_' + method_name
, None)
820 # Must be declaring a variable.
821 # TODO(nnorwitz): handle the declaration.
824 return self
._GetMethod
(temp_tokens
, 0, None, False)
825 elif token
.token_type
== tokenize
.SYNTAX
:
826 if token
.name
== '~' and self
.in_class
:
827 # Must be a dtor (probably not in method body).
828 token
= self
._GetNextToken
()
829 # self.in_class can contain A::Name, but the dtor will only
830 # be Name. Make sure to compare against the right value.
831 if (token
.token_type
== tokenize
.NAME
and
832 token
.name
== self
.in_class_name_only
):
833 return self
._GetMethod
([token
], FUNCTION_DTOR
, None, True)
834 # TODO(nnorwitz): handle a lot more syntax.
835 elif token
.token_type
== tokenize
.PREPROCESSOR
:
836 # TODO(nnorwitz): handle more preprocessor directives.
837 # token starts with a #, so remove it and strip whitespace.
838 name
= token
.name
[1:].lstrip()
839 if name
.startswith('include'):
841 name
= name
[7:].strip()
843 # Handle #include \<newline> "header-on-second-line.h".
844 if name
.startswith('\\'):
845 name
= name
[1:].strip()
846 assert name
[0] in '<"', token
847 assert name
[-1] in '>"', token
848 system
= name
[0] == '<'
849 filename
= name
[1:-1]
850 return Include(token
.start
, token
.end
, filename
, system
)
851 if name
.startswith('define'):
853 name
= name
[6:].strip()
856 for i
, c
in enumerate(name
):
858 value
= name
[i
:].lstrip()
861 return Define(token
.start
, token
.end
, name
, value
)
862 if name
.startswith('if') and name
[2:3].isspace():
863 condition
= name
[3:].strip()
864 if condition
.startswith('0') or condition
.startswith('(0)'):
865 self
._SkipIf
0Blocks
()
868 def _GetTokensUpTo(self
, expected_token_type
, expected_token
):
869 return self
._GetVarTokensUpTo
(expected_token_type
, expected_token
)[0]
871 def _GetVarTokensUpTo(self
, expected_token_type
, *expected_tokens
):
872 last_token
= self
._GetNextToken
()
874 while (last_token
.token_type
!= expected_token_type
or
875 last_token
.name
not in expected_tokens
):
876 tokens
.append(last_token
)
877 last_token
= self
._GetNextToken
()
878 return tokens
, last_token
880 # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
882 def _GetVarTokensUpToIgnoringTemplates(self
, expected_token_type
,
884 last_token
= self
._GetNextToken
()
887 while (nesting
> 0 or
888 last_token
.token_type
!= expected_token_type
or
889 last_token
.name
not in expected_tokens
):
890 tokens
.append(last_token
)
891 last_token
= self
._GetNextToken
()
892 if last_token
.name
== '<':
894 elif last_token
.name
== '>':
896 return tokens
, last_token
898 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necessary.
899 def _IgnoreUpTo(self
, token_type
, token
):
900 unused_tokens
= self
._GetTokensUpTo
(token_type
, token
)
902 def _SkipIf0Blocks(self
):
905 token
= self
._GetNextToken
()
906 if token
.token_type
!= tokenize
.PREPROCESSOR
:
909 name
= token
.name
[1:].lstrip()
910 if name
.startswith('endif'):
914 elif name
.startswith('if'):
917 def _GetMatchingChar(self
, open_paren
, close_paren
, GetNextToken
=None):
918 if GetNextToken
is None:
919 GetNextToken
= self
._GetNextToken
920 # Assumes the current token is open_paren and we will consume
921 # and return up to the close_paren.
923 token
= GetNextToken()
925 if token
.token_type
== tokenize
.SYNTAX
:
926 if token
.name
== open_paren
:
928 elif token
.name
== close_paren
:
933 token
= GetNextToken()
936 def _GetParameters(self
):
937 return self
._GetMatchingChar
('(', ')')
940 return self
._GetMatchingChar
('{', '}')
942 def _GetNextToken(self
):
944 return self
.token_queue
.pop()
946 return next(self
.tokens
)
947 except StopIteration:
950 def _AddBackToken(self
, token
):
951 if token
.whence
== tokenize
.WHENCE_STREAM
:
952 token
.whence
= tokenize
.WHENCE_QUEUE
953 self
.token_queue
.insert(0, token
)
955 assert token
.whence
== tokenize
.WHENCE_QUEUE
, token
956 self
.token_queue
.append(token
)
958 def _AddBackTokens(self
, tokens
):
960 if tokens
[-1].whence
== tokenize
.WHENCE_STREAM
:
962 token
.whence
= tokenize
.WHENCE_QUEUE
963 self
.token_queue
[:0] = reversed(tokens
)
965 assert tokens
[-1].whence
== tokenize
.WHENCE_QUEUE
, tokens
966 self
.token_queue
.extend(reversed(tokens
))
968 def GetName(self
, seq
=None):
969 """Returns ([tokens], next_token_info)."""
970 GetNextToken
= self
._GetNextToken
973 GetNextToken
= lambda: next(it
)
974 next_token
= GetNextToken()
976 last_token_was_name
= False
977 while (next_token
.token_type
== tokenize
.NAME
or
978 (next_token
.token_type
== tokenize
.SYNTAX
and
979 next_token
.name
in ('::', '<'))):
980 # Two NAMEs in a row means the identifier should terminate.
981 # It's probably some sort of variable declaration.
982 if last_token_was_name
and next_token
.token_type
== tokenize
.NAME
:
984 last_token_was_name
= next_token
.token_type
== tokenize
.NAME
985 tokens
.append(next_token
)
986 # Handle templated names.
987 if next_token
.name
== '<':
988 tokens
.extend(self
._GetMatchingChar
('<', '>', GetNextToken
))
989 last_token_was_name
= True
990 next_token
= GetNextToken()
991 return tokens
, next_token
993 def GetMethod(self
, modifiers
, templated_types
):
994 return_type_and_name
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, '(')
995 assert len(return_type_and_name
) >= 1
996 return self
._GetMethod
(return_type_and_name
, modifiers
, templated_types
,
999 def _GetMethod(self
, return_type_and_name
, modifiers
, templated_types
,
1001 template_portion
= None
1003 token
= self
._GetNextToken
()
1004 assert token
.token_type
== tokenize
.SYNTAX
, token
1005 if token
.name
== '<':
1006 # Handle templatized dtors.
1007 template_portion
= [token
]
1008 template_portion
.extend(self
._GetMatchingChar
('<', '>'))
1009 token
= self
._GetNextToken
()
1010 assert token
.token_type
== tokenize
.SYNTAX
, token
1011 assert token
.name
== '(', token
1013 name
= return_type_and_name
.pop()
1014 # Handle templatized ctors.
1015 if name
.name
== '>':
1017 while return_type_and_name
[index
].name
!= '<':
1019 template_portion
= return_type_and_name
[index
:] + [name
]
1020 del return_type_and_name
[index
:]
1021 name
= return_type_and_name
.pop()
1022 elif name
.name
== ']':
1023 rt
= return_type_and_name
1024 assert rt
[-1].name
== '[', return_type_and_name
1025 assert rt
[-2].name
== 'operator', return_type_and_name
1026 name_seq
= return_type_and_name
[-2:]
1027 del return_type_and_name
[-2:]
1028 name
= tokenize
.Token(tokenize
.NAME
, 'operator[]',
1029 name_seq
[0].start
, name
.end
)
1030 # Get the open paren so _GetParameters() below works.
1031 unused_open_paren
= self
._GetNextToken
()
1033 # TODO(nnorwitz): store template_portion.
1034 return_type
= return_type_and_name
1037 indices
= return_type
[0]
1039 # Force ctor for templatized ctors.
1040 if name
.name
== self
.in_class
and not modifiers
:
1041 modifiers |
= FUNCTION_CTOR
1042 parameters
= list(self
._GetParameters
())
1043 del parameters
[-1] # Remove trailing ')'.
1045 # Handling operator() is especially weird.
1046 if name
.name
== 'operator' and not parameters
:
1047 token
= self
._GetNextToken
()
1048 assert token
.name
== '(', token
1049 parameters
= list(self
._GetParameters
())
1050 del parameters
[-1] # Remove trailing ')'.
1052 token
= self
._GetNextToken
()
1053 while token
.token_type
== tokenize
.NAME
:
1054 modifier_token
= token
1055 token
= self
._GetNextToken
()
1056 if modifier_token
.name
== 'const':
1057 modifiers |
= FUNCTION_CONST
1058 elif modifier_token
.name
== '__attribute__':
1059 # TODO(nnorwitz): handle more __attribute__ details.
1060 modifiers |
= FUNCTION_ATTRIBUTE
1061 assert token
.name
== '(', token
1062 # Consume everything between the (parens).
1063 unused_tokens
= list(self
._GetMatchingChar
('(', ')'))
1064 token
= self
._GetNextToken
()
1065 elif modifier_token
.name
== 'throw':
1066 modifiers |
= FUNCTION_THROW
1067 assert token
.name
== '(', token
1068 # Consume everything between the (parens).
1069 unused_tokens
= list(self
._GetMatchingChar
('(', ')'))
1070 token
= self
._GetNextToken
()
1071 elif modifier_token
.name
== 'override':
1072 modifiers |
= FUNCTION_OVERRIDE
1073 elif modifier_token
.name
== modifier_token
.name
.upper():
1074 # HACK(nnorwitz): assume that all upper-case names
1075 # are some macro we aren't expanding.
1076 modifiers |
= FUNCTION_UNKNOWN_ANNOTATION
1078 self
.HandleError('unexpected token', modifier_token
)
1080 assert token
.token_type
== tokenize
.SYNTAX
, token
1081 # Handle ctor initializers.
1082 if token
.name
== ':':
1083 # TODO(nnorwitz): anything else to handle for initializer list?
1084 while token
.name
!= ';' and token
.name
!= '{':
1085 token
= self
._GetNextToken
()
1087 # Handle pointer to functions that are really data but look
1088 # like method declarations.
1089 if token
.name
== '(':
1090 if parameters
[0].name
== '*':
1091 # name contains the return type.
1092 name
= parameters
.pop()
1093 # parameters contains the name of the data.
1094 modifiers
= [p
.name
for p
in parameters
]
1095 # Already at the ( to open the parameter list.
1096 function_parameters
= list(self
._GetMatchingChar
('(', ')'))
1097 del function_parameters
[-1] # Remove trailing ')'.
1098 # TODO(nnorwitz): store the function_parameters.
1099 token
= self
._GetNextToken
()
1100 assert token
.token_type
== tokenize
.SYNTAX
, token
1101 assert token
.name
== ';', token
1102 return self
._CreateVariable
(indices
, name
.name
, indices
.name
,
1103 modifiers
, '', None)
1104 # At this point, we got something like:
1105 # return_type (type::*name_)(params);
1106 # This is a data member called name_ that is a function pointer.
1107 # With this code: void (sq_type::*field_)(string&);
1108 # We get: name=void return_type=[] parameters=sq_type ... field_
1109 # TODO(nnorwitz): is return_type always empty?
1110 # TODO(nnorwitz): this isn't even close to being correct.
1111 # Just put in something so we don't crash and can move on.
1112 real_name
= parameters
[-1]
1113 modifiers
= [p
.name
for p
in self
._GetParameters
()]
1114 del modifiers
[-1] # Remove trailing ')'.
1115 return self
._CreateVariable
(indices
, real_name
.name
, indices
.name
,
1116 modifiers
, '', None)
1118 if token
.name
== '{':
1119 body
= list(self
.GetScope())
1120 del body
[-1] # Remove trailing '}'.
1123 if token
.name
== '=':
1124 token
= self
._GetNextToken
()
1126 if token
.name
== 'default' or token
.name
== 'delete':
1127 # Ignore explicitly defaulted and deleted special members
1129 token
= self
._GetNextToken
()
1131 # Handle pure-virtual declarations.
1132 assert token
.token_type
== tokenize
.CONSTANT
, token
1133 assert token
.name
== '0', token
1134 modifiers |
= FUNCTION_PURE_VIRTUAL
1135 token
= self
._GetNextToken
()
1137 if token
.name
== '[':
1138 # TODO(nnorwitz): store tokens and improve parsing.
1139 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1140 tokens
= list(self
._GetMatchingChar
('[', ']'))
1141 token
= self
._GetNextToken
()
1143 assert token
.name
== ';', (token
, return_type_and_name
, parameters
)
1145 # Looks like we got a method, not a function.
1146 if len(return_type
) > 2 and return_type
[-1].name
== '::':
1147 return_type
, in_class
= \
1148 self
._GetReturnTypeAndClassName
(return_type
)
1149 return Method(indices
.start
, indices
.end
, name
.name
, in_class
,
1150 return_type
, parameters
, modifiers
, templated_types
,
1151 body
, self
.namespace_stack
)
1152 return Function(indices
.start
, indices
.end
, name
.name
, return_type
,
1153 parameters
, modifiers
, templated_types
, body
,
1154 self
.namespace_stack
)
1156 def _GetReturnTypeAndClassName(self
, token_seq
):
1157 # Splitting the return type from the class name in a method
1158 # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1159 # Where is the return type and where is the class name?
1160 # The heuristic used is to pull the last name as the class name.
1161 # This includes all the templated type info.
1162 # TODO(nnorwitz): if there is only One name like in the
1163 # example above, punt and assume the last bit is the class name.
1165 # Ignore a :: prefix, if exists so we can find the first real name.
1167 if token_seq
[0].name
== '::':
1169 # Ignore a :: suffix, if exists.
1170 end
= len(token_seq
) - 1
1171 if token_seq
[end
-1].name
== '::':
1174 # Make a copy of the sequence so we can append a sentinel
1175 # value. This is required for GetName will has to have some
1176 # terminating condition beyond the last name.
1177 seq_copy
= token_seq
[i
:end
]
1178 seq_copy
.append(tokenize
.Token(tokenize
.SYNTAX
, '', 0, 0))
1181 # Iterate through the sequence parsing out each name.
1182 new_name
, next
= self
.GetName(seq_copy
[i
:])
1183 assert new_name
, 'Got empty new_name, next=%s' % next
1184 # We got a pointer or ref. Add it to the name.
1185 if next
and next
.token_type
== tokenize
.SYNTAX
:
1186 new_name
.append(next
)
1187 names
.append(new_name
)
1190 # Now that we have the names, it's time to undo what we did.
1192 # Remove the sentinel value.
1194 # Flatten the token sequence for the return type.
1195 return_type
= [e
for seq
in names
[:-1] for e
in seq
]
1196 # The class name is the last name.
1197 class_name
= names
[-1]
1198 return return_type
, class_name
1200 def handle_bool(self
):
1203 def handle_char(self
):
1206 def handle_int(self
):
1209 def handle_long(self
):
1212 def handle_short(self
):
1215 def handle_double(self
):
1218 def handle_float(self
):
1221 def handle_void(self
):
1224 def handle_wchar_t(self
):
1227 def handle_unsigned(self
):
1230 def handle_signed(self
):
1233 def _GetNestedType(self
, ctor
):
1235 name_tokens
, token
= self
.GetName()
1237 name
= ''.join([t
.name
for t
in name_tokens
])
1239 # Handle forward declarations.
1240 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== ';':
1241 return ctor(token
.start
, token
.end
, name
, None,
1242 self
.namespace_stack
)
1244 if token
.token_type
== tokenize
.NAME
and self
._handling
_typedef
:
1245 self
._AddBackToken
(token
)
1246 return ctor(token
.start
, token
.end
, name
, None,
1247 self
.namespace_stack
)
1249 # Must be the type declaration.
1250 fields
= list(self
._GetMatchingChar
('{', '}'))
1251 del fields
[-1] # Remove trailing '}'.
1252 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== '{':
1253 next
= self
._GetNextToken
()
1254 new_type
= ctor(token
.start
, token
.end
, name
, fields
,
1255 self
.namespace_stack
)
1256 # A name means this is an anonymous type and the name
1257 # is the variable declaration.
1258 if next
.token_type
!= tokenize
.NAME
:
1263 # Must be variable declaration using the type prefixed with keyword.
1264 assert token
.token_type
== tokenize
.NAME
, token
1265 return self
._CreateVariable
(token
, token
.name
, name
, [], '', None)
1267 def handle_struct(self
):
1268 # Special case the handling typedef/aliasing of structs here.
1269 # It would be a pain to handle in the class code.
1270 name_tokens
, var_token
= self
.GetName()
1272 next_token
= self
._GetNextToken
()
1273 is_syntax
= (var_token
.token_type
== tokenize
.SYNTAX
and
1274 var_token
.name
[0] in '*&')
1275 is_variable
= (var_token
.token_type
== tokenize
.NAME
and
1276 next_token
.name
== ';')
1277 variable
= var_token
1278 if is_syntax
and not is_variable
:
1279 variable
= next_token
1280 temp
= self
._GetNextToken
()
1281 if temp
.token_type
== tokenize
.SYNTAX
and temp
.name
== '(':
1282 # Handle methods declared to return a struct.
1284 struct
= tokenize
.Token(tokenize
.NAME
, 'struct',
1285 t0
.start
-7, t0
.start
-2)
1286 type_and_name
= [struct
]
1287 type_and_name
.extend(name_tokens
)
1288 type_and_name
.extend((var_token
, next_token
))
1289 return self
._GetMethod
(type_and_name
, 0, None, False)
1290 assert temp
.name
== ';', (temp
, name_tokens
, var_token
)
1291 if is_syntax
or (is_variable
and not self
._handling
_typedef
):
1292 modifiers
= ['struct']
1293 type_name
= ''.join([t
.name
for t
in name_tokens
])
1294 position
= name_tokens
[0]
1295 return self
._CreateVariable
(position
, variable
.name
, type_name
,
1296 modifiers
, var_token
.name
, None)
1297 name_tokens
.extend((var_token
, next_token
))
1298 self
._AddBackTokens
(name_tokens
)
1300 self
._AddBackToken
(var_token
)
1301 return self
._GetClass
(Struct
, VISIBILITY_PUBLIC
, None)
1303 def handle_union(self
):
1304 return self
._GetNestedType
(Union
)
1306 def handle_enum(self
):
1307 return self
._GetNestedType
(Enum
)
1309 def handle_auto(self
):
1310 # TODO(nnorwitz): warn about using auto? Probably not since it
1311 # will be reclaimed and useful for C++0x.
1314 def handle_register(self
):
1317 def handle_const(self
):
1320 def handle_inline(self
):
1323 def handle_extern(self
):
1326 def handle_static(self
):
1329 def handle_virtual(self
):
1330 # What follows must be a method.
1331 token
= token2
= self
._GetNextToken
()
1332 if token
.name
== 'inline':
1333 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1334 token2
= self
._GetNextToken
()
1335 if token2
.token_type
== tokenize
.SYNTAX
and token2
.name
== '~':
1336 return self
.GetMethod(FUNCTION_VIRTUAL
+ FUNCTION_DTOR
, None)
1337 assert token
.token_type
== tokenize
.NAME
or token
.name
== '::', token
1338 return_type_and_name
, _
= self
._GetVarTokensUpToIgnoringTemplates
(
1339 tokenize
.SYNTAX
, '(') # )
1340 return_type_and_name
.insert(0, token
)
1341 if token2
is not token
:
1342 return_type_and_name
.insert(1, token2
)
1343 return self
._GetMethod
(return_type_and_name
, FUNCTION_VIRTUAL
,
1346 def handle_volatile(self
):
1349 def handle_mutable(self
):
1352 def handle_public(self
):
1353 assert self
.in_class
1354 self
.visibility
= VISIBILITY_PUBLIC
1356 def handle_protected(self
):
1357 assert self
.in_class
1358 self
.visibility
= VISIBILITY_PROTECTED
1360 def handle_private(self
):
1361 assert self
.in_class
1362 self
.visibility
= VISIBILITY_PRIVATE
1364 def handle_friend(self
):
1365 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1368 return Friend(t0
.start
, t0
.end
, tokens
, self
.namespace_stack
)
1370 def handle_static_cast(self
):
1373 def handle_const_cast(self
):
1376 def handle_dynamic_cast(self
):
1379 def handle_reinterpret_cast(self
):
1382 def handle_new(self
):
1385 def handle_delete(self
):
1386 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1388 return Delete(tokens
[0].start
, tokens
[0].end
, tokens
)
1390 def handle_typedef(self
):
1391 token
= self
._GetNextToken
()
1392 if (token
.token_type
== tokenize
.NAME
and
1393 keywords
.IsKeyword(token
.name
)):
1394 # Token must be struct/enum/union/class.
1395 method
= getattr(self
, 'handle_' + token
.name
)
1396 self
._handling
_typedef
= True
1398 self
._handling
_typedef
= False
1402 # Get the remainder of the typedef up to the semi-colon.
1403 tokens
.extend(self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';'))
1405 # TODO(nnorwitz): clean all this up.
1413 if name
.name
== ')':
1414 # HACK(nnorwitz): Handle pointers to functions "properly".
1415 if (len(tokens
) >= 4 and
1416 tokens
[1].name
== '(' and tokens
[2].name
== '*'):
1419 elif name
.name
== ']':
1420 # HACK(nnorwitz): Handle arrays properly.
1421 if len(tokens
) >= 2:
1425 if tokens
and isinstance(tokens
[0], tokenize
.Token
):
1426 new_type
= self
.converter
.ToType(tokens
)[0]
1427 return Typedef(indices
.start
, indices
.end
, name
.name
,
1428 new_type
, self
.namespace_stack
)
1430 def handle_typeid(self
):
1431 pass # Not needed yet.
1433 def handle_typename(self
):
1434 pass # Not needed yet.
1436 def _GetTemplatedTypes(self
):
1437 result
= collections
.OrderedDict()
1438 tokens
= list(self
._GetMatchingChar
('<', '>'))
1439 len_tokens
= len(tokens
) - 1 # Ignore trailing '>'.
1441 while i
< len_tokens
:
1442 key
= tokens
[i
].name
1444 if keywords
.IsKeyword(key
) or key
== ',':
1446 type_name
= default
= None
1449 if tokens
[i
-1].name
== '=':
1450 assert i
< len_tokens
, '%s %s' % (i
, tokens
)
1451 default
, unused_next_token
= self
.GetName(tokens
[i
:])
1454 if tokens
[i
-1].name
!= ',':
1455 # We got something like: Type variable.
1456 # Re-adjust the key (variable) and type_name (Type).
1457 key
= tokens
[i
-1].name
1458 type_name
= tokens
[i
-2]
1460 result
[key
] = (type_name
, default
)
1463 def handle_template(self
):
1464 token
= self
._GetNextToken
()
1465 assert token
.token_type
== tokenize
.SYNTAX
, token
1466 assert token
.name
== '<', token
1467 templated_types
= self
._GetTemplatedTypes
()
1468 # TODO(nnorwitz): for now, just ignore the template params.
1469 token
= self
._GetNextToken
()
1470 if token
.token_type
== tokenize
.NAME
:
1471 if token
.name
== 'class':
1472 return self
._GetClass
(Class
, VISIBILITY_PRIVATE
, templated_types
)
1473 elif token
.name
== 'struct':
1474 return self
._GetClass
(Struct
, VISIBILITY_PUBLIC
, templated_types
)
1475 elif token
.name
== 'friend':
1476 return self
.handle_friend()
1477 self
._AddBackToken
(token
)
1478 tokens
, last
= self
._GetVarTokensUpTo
(tokenize
.SYNTAX
, '(', ';')
1480 self
._AddBackTokens
(tokens
)
1481 if last
.name
== '(':
1482 return self
.GetMethod(FUNCTION_NONE
, templated_types
)
1483 # Must be a variable definition.
1486 def handle_true(self
):
1487 pass # Nothing to do.
1489 def handle_false(self
):
1490 pass # Nothing to do.
1492 def handle_asm(self
):
1493 pass # Not needed yet.
1495 def handle_class(self
):
1496 return self
._GetClass
(Class
, VISIBILITY_PRIVATE
, None)
1498 def _GetBases(self
):
1502 token
= self
._GetNextToken
()
1503 assert token
.token_type
== tokenize
.NAME
, token
1504 # TODO(nnorwitz): store kind of inheritance...maybe.
1505 if token
.name
not in ('public', 'protected', 'private'):
1506 # If inheritance type is not specified, it is private.
1507 # Just put the token back so we can form a name.
1508 # TODO(nnorwitz): it would be good to warn about this.
1509 self
._AddBackToken
(token
)
1511 # Check for virtual inheritance.
1512 token
= self
._GetNextToken
()
1513 if token
.name
!= 'virtual':
1514 self
._AddBackToken
(token
)
1516 # TODO(nnorwitz): store that we got virtual for this base.
1518 base
, next_token
= self
.GetName()
1519 bases_ast
= self
.converter
.ToType(base
)
1520 assert len(bases_ast
) == 1, bases_ast
1521 bases
.append(bases_ast
[0])
1522 assert next_token
.token_type
== tokenize
.SYNTAX
, next_token
1523 if next_token
.name
== '{':
1526 # Support multiple inheritance.
1527 assert next_token
.name
== ',', next_token
1530 def _GetClass(self
, class_type
, visibility
, templated_types
):
1532 class_token
= self
._GetNextToken
()
1533 if class_token
.token_type
!= tokenize
.NAME
:
1534 assert class_token
.token_type
== tokenize
.SYNTAX
, class_token
1537 # Skip any macro (e.g. storage class specifiers) after the
1539 next_token
= self
._GetNextToken
()
1540 if next_token
.token_type
== tokenize
.NAME
:
1541 self
._AddBackToken
(next_token
)
1543 self
._AddBackTokens
([class_token
, next_token
])
1544 name_tokens
, token
= self
.GetName()
1545 class_name
= ''.join([t
.name
for t
in name_tokens
])
1547 if token
.token_type
== tokenize
.SYNTAX
:
1548 if token
.name
== ';':
1549 # Forward declaration.
1550 return class_type(class_token
.start
, class_token
.end
,
1551 class_name
, None, templated_types
, None,
1552 self
.namespace_stack
)
1553 if token
.name
in '*&':
1554 # Inline forward declaration. Could be method or data.
1555 name_token
= self
._GetNextToken
()
1556 next_token
= self
._GetNextToken
()
1557 if next_token
.name
== ';':
1559 modifiers
= ['class']
1560 return self
._CreateVariable
(class_token
, name_token
.name
,
1562 modifiers
, token
.name
, None)
1564 # Assume this is a method.
1565 tokens
= (class_token
, token
, name_token
, next_token
)
1566 self
._AddBackTokens
(tokens
)
1567 return self
.GetMethod(FUNCTION_NONE
, None)
1568 if token
.name
== ':':
1569 bases
, token
= self
._GetBases
()
1572 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== '{':
1573 assert token
.token_type
== tokenize
.SYNTAX
, token
1574 assert token
.name
== '{', token
1576 ast
= AstBuilder(self
.GetScope(), self
.filename
, class_name
,
1577 visibility
, self
.namespace_stack
)
1578 body
= list(ast
.Generate())
1580 if not self
._handling
_typedef
:
1581 token
= self
._GetNextToken
()
1582 if token
.token_type
!= tokenize
.NAME
:
1583 assert token
.token_type
== tokenize
.SYNTAX
, token
1584 assert token
.name
== ';', token
1586 new_class
= class_type(class_token
.start
, class_token
.end
,
1587 class_name
, bases
, None,
1588 body
, self
.namespace_stack
)
1591 return self
._CreateVariable
(class_token
,
1592 token
.name
, new_class
,
1593 modifiers
, token
.name
, None)
1595 if not self
._handling
_typedef
:
1596 self
.HandleError('non-typedef token', token
)
1597 self
._AddBackToken
(token
)
1599 return class_type(class_token
.start
, class_token
.end
, class_name
,
1600 bases
, templated_types
, body
, self
.namespace_stack
)
1602 def handle_namespace(self
):
1603 # Support anonymous namespaces.
1605 name_tokens
, token
= self
.GetName()
1607 name
= ''.join([t
.name
for t
in name_tokens
])
1608 self
.namespace_stack
.append(name
)
1609 assert token
.token_type
== tokenize
.SYNTAX
, token
1610 # Create an internal token that denotes when the namespace is complete.
1611 internal_token
= tokenize
.Token(_INTERNAL_TOKEN
, _NAMESPACE_POP
,
1613 internal_token
.whence
= token
.whence
1614 if token
.name
== '=':
1615 # TODO(nnorwitz): handle aliasing namespaces.
1616 name
, next_token
= self
.GetName()
1617 assert next_token
.name
== ';', next_token
1618 self
._AddBackToken
(internal_token
)
1620 assert token
.name
== '{', token
1621 tokens
= list(self
.GetScope())
1622 # Replace the trailing } with the internal namespace pop token.
1623 tokens
[-1] = internal_token
1624 # Handle namespace with nothing in it.
1625 self
._AddBackTokens
(tokens
)
1628 def handle_using(self
):
1629 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1631 return Using(tokens
[0].start
, tokens
[0].end
, tokens
)
1633 def handle_explicit(self
):
1634 assert self
.in_class
1635 # Nothing much to do.
1636 # TODO(nnorwitz): maybe verify the method name == class name.
1637 # This must be a ctor.
1638 return self
.GetMethod(FUNCTION_CTOR
, None)
1640 def handle_this(self
):
1641 pass # Nothing to do.
1643 def handle_operator(self
):
1644 # Pull off the next token(s?) and make that part of the method name.
1647 def handle_sizeof(self
):
1650 def handle_case(self
):
1653 def handle_switch(self
):
1656 def handle_default(self
):
1657 token
= self
._GetNextToken
()
1658 assert token
.token_type
== tokenize
.SYNTAX
1659 assert token
.name
== ':'
1661 def handle_if(self
):
1664 def handle_else(self
):
1667 def handle_return(self
):
1668 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1670 return Return(self
.current_token
.start
, self
.current_token
.end
, None)
1671 return Return(tokens
[0].start
, tokens
[0].end
, tokens
)
1673 def handle_goto(self
):
1674 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1675 assert len(tokens
) == 1, str(tokens
)
1676 return Goto(tokens
[0].start
, tokens
[0].end
, tokens
[0].name
)
1678 def handle_try(self
):
1679 pass # Not needed yet.
1681 def handle_catch(self
):
1682 pass # Not needed yet.
1684 def handle_throw(self
):
1685 pass # Not needed yet.
1687 def handle_while(self
):
1690 def handle_do(self
):
1693 def handle_for(self
):
1696 def handle_break(self
):
1697 self
._IgnoreUpTo
(tokenize
.SYNTAX
, ';')
1699 def handle_continue(self
):
1700 self
._IgnoreUpTo
(tokenize
.SYNTAX
, ';')
1703 def BuilderFromSource(source
, filename
):
1704 """Utility method that returns an AstBuilder from source code.
1707 source: 'C++ source code'
1713 return AstBuilder(tokenize
.GetTokens(source
), filename
)
1716 def PrintIndentifiers(filename
, should_print
):
1717 """Prints all identifiers for a C++ source file.
1721 should_print: predicate with signature: bool Function(token)
1723 source
= utils
.ReadFile(filename
, False)
1725 sys
.stderr
.write('Unable to find: %s\n' % filename
)
1728 #print('Processing %s' % actual_filename)
1729 builder
= BuilderFromSource(source
, filename
)
1731 for node
in builder
.Generate():
1732 if should_print(node
):
1734 except KeyboardInterrupt:
1740 def PrintAllIndentifiers(filenames
, should_print
):
1741 """Prints all identifiers for each C++ source file in filenames.
1744 filenames: ['file1', 'file2', ...]
1745 should_print: predicate with signature: bool Function(token)
1747 for path
in filenames
:
1748 PrintIndentifiers(path
, should_print
)
1752 for filename
in argv
[1:]:
1753 source
= utils
.ReadFile(filename
)
1757 print('Processing %s' % filename
)
1758 builder
= BuilderFromSource(source
, filename
)
1760 entire_ast
= filter(None, builder
.Generate())
1761 except KeyboardInterrupt:
1764 # Already printed a warning, print the traceback and continue.
1765 traceback
.print_exc()
1768 for ast
in entire_ast
:
1772 if __name__
== '__main__':