3 # Copyright 2007 Neal Norwitz
4 # Portions Copyright 2007 Google Inc.
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
18 """Generate an Abstract Syntax Tree (AST) for C++."""
21 # * Tokens should never be exported, need to convert to Nodes
22 # (return types, parameters, etc.)
23 # * Handle static class data for templatized classes
24 # * Handle casts (both C++ and C-style)
25 # * Handle conditions and loops (if/else, switch, for, while/do)
27 # TODO much, much later:
37 import __builtin__
as builtins
42 from cpp
import keywords
43 from cpp
import tokenize
47 if not hasattr(builtins
, 'reversed'):
48 # Support Python 2.3 and earlier.
50 for i
in range(len(seq
)-1, -1, -1):
53 if not hasattr(builtins
, 'next'):
54 # Support Python 2.5 and earlier.
59 VISIBILITY_PUBLIC
, VISIBILITY_PROTECTED
, VISIBILITY_PRIVATE
= range(3)
63 FUNCTION_VIRTUAL
= 0x02
64 FUNCTION_PURE_VIRTUAL
= 0x04
67 FUNCTION_ATTRIBUTE
= 0x20
68 FUNCTION_UNKNOWN_ANNOTATION
= 0x40
70 FUNCTION_OVERRIDE
= 0x100
73 These are currently unused. Should really handle these properly at some point.
75 TYPE_MODIFIER_INLINE = 0x010000
76 TYPE_MODIFIER_EXTERN = 0x020000
77 TYPE_MODIFIER_STATIC = 0x040000
78 TYPE_MODIFIER_CONST = 0x080000
79 TYPE_MODIFIER_REGISTER = 0x100000
80 TYPE_MODIFIER_VOLATILE = 0x200000
81 TYPE_MODIFIER_MUTABLE = 0x400000
84 'inline': TYPE_MODIFIER_INLINE,
85 'extern': TYPE_MODIFIER_EXTERN,
86 'static': TYPE_MODIFIER_STATIC,
87 'const': TYPE_MODIFIER_CONST,
88 'register': TYPE_MODIFIER_REGISTER,
89 'volatile': TYPE_MODIFIER_VOLATILE,
90 'mutable': TYPE_MODIFIER_MUTABLE,
94 _INTERNAL_TOKEN
= 'internal'
95 _NAMESPACE_POP
= 'ns-pop'
98 # TODO(nnorwitz): use this as a singleton for templated_types, etc
99 # where we don't want to create a new empty dict each time. It is also const.
100 class _NullDict(object):
101 __contains__
= lambda self
: False
102 keys
= values
= items
= iterkeys
= itervalues
= iteritems
= lambda self
: ()
105 # TODO(nnorwitz): move AST nodes into a separate module.
109 def __init__(self
, start
, end
):
113 def IsDeclaration(self
):
114 """Returns bool if this node is a declaration."""
117 def IsDefinition(self
):
118 """Returns bool if this node is a definition."""
121 def IsExportable(self
):
122 """Returns bool if this node exportable from a header file."""
125 def Requires(self
, node
):
126 """Does this AST node require the definition of the node passed in?"""
129 def XXX__str__(self
):
130 return self
._StringHelper
(self
.__class
__.__name
__, '')
132 def _StringHelper(self
, name
, suffix
):
134 return '%s(%s)' % (name
, suffix
)
135 return '%s(%d, %d, %s)' % (name
, self
.start
, self
.end
, suffix
)
142 def __init__(self
, start
, end
, name
, definition
):
143 Node
.__init
__(self
, start
, end
)
145 self
.definition
= definition
148 value
= '%s %s' % (self
.name
, self
.definition
)
149 return self
._StringHelper
(self
.__class
__.__name
__, value
)
153 def __init__(self
, start
, end
, filename
, system
):
154 Node
.__init
__(self
, start
, end
)
155 self
.filename
= filename
162 return self
._StringHelper
(self
.__class
__.__name
__, fmt
% self
.filename
)
166 def __init__(self
, start
, end
, label
):
167 Node
.__init
__(self
, start
, end
)
171 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.label
))
175 def __init__(self
, start
, end
, expr
):
176 Node
.__init
__(self
, start
, end
)
179 def Requires(self
, node
):
180 # TODO(nnorwitz): impl.
184 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.expr
))
196 def __init__(self
, start
, end
, expr
, namespace
):
197 Expr
.__init
__(self
, start
, end
, expr
)
198 self
.namespace
= namespace
[:]
202 def __init__(self
, start
, end
, names
):
203 Node
.__init
__(self
, start
, end
)
207 return self
._StringHelper
(self
.__class
__.__name
__, str(self
.names
))
210 class Parameter(Node
):
211 def __init__(self
, start
, end
, name
, parameter_type
, default
):
212 Node
.__init
__(self
, start
, end
)
214 self
.type = parameter_type
215 self
.default
= default
217 def Requires(self
, node
):
218 # TODO(nnorwitz): handle namespaces, etc.
219 return self
.type.name
== node
.name
222 name
= str(self
.type)
223 suffix
= '%s %s' % (name
, self
.name
)
225 suffix
+= ' = ' + ''.join([d
.name
for d
in self
.default
])
226 return self
._StringHelper
(self
.__class
__.__name
__, suffix
)
229 class _GenericDeclaration(Node
):
230 def __init__(self
, start
, end
, name
, namespace
):
231 Node
.__init
__(self
, start
, end
)
233 self
.namespace
= namespace
[:]
237 if self
.namespace
and self
.namespace
[-1]:
238 prefix
= '::'.join(self
.namespace
) + '::'
239 return prefix
+ self
.name
241 def _TypeStringHelper(self
, suffix
):
243 names
= [n
or '<anonymous>' for n
in self
.namespace
]
244 suffix
+= ' in ' + '::'.join(names
)
245 return self
._StringHelper
(self
.__class
__.__name
__, suffix
)
248 # TODO(nnorwitz): merge with Parameter in some way?
249 class VariableDeclaration(_GenericDeclaration
):
250 def __init__(self
, start
, end
, name
, var_type
, initial_value
, namespace
):
251 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
253 self
.initial_value
= initial_value
255 def Requires(self
, node
):
256 # TODO(nnorwitz): handle namespaces, etc.
257 return self
.type.name
== node
.name
260 """Return a string that tries to reconstitute the variable decl."""
261 suffix
= '%s %s' % (self
.type, self
.name
)
262 if self
.initial_value
:
263 suffix
+= ' = ' + self
.initial_value
267 return self
._StringHelper
(self
.__class
__.__name
__, self
.ToString())
270 class Typedef(_GenericDeclaration
):
271 def __init__(self
, start
, end
, name
, alias
, namespace
):
272 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
275 def IsDefinition(self
):
278 def IsExportable(self
):
281 def Requires(self
, node
):
282 # TODO(nnorwitz): handle namespaces, etc.
284 for token
in self
.alias
:
285 if token
is not None and name
== token
.name
:
290 suffix
= '%s, %s' % (self
.name
, self
.alias
)
291 return self
._TypeStringHelper
(suffix
)
294 class _NestedType(_GenericDeclaration
):
295 def __init__(self
, start
, end
, name
, fields
, namespace
):
296 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
299 def IsDefinition(self
):
302 def IsExportable(self
):
306 suffix
= '%s, {%s}' % (self
.name
, self
.fields
)
307 return self
._TypeStringHelper
(suffix
)
310 class Union(_NestedType
):
314 class Enum(_NestedType
):
318 class Class(_GenericDeclaration
):
319 def __init__(self
, start
, end
, name
, bases
, templated_types
, body
, namespace
):
320 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
323 self
.templated_types
= templated_types
325 def IsDeclaration(self
):
326 return self
.bases
is None and self
.body
is None
328 def IsDefinition(self
):
329 return not self
.IsDeclaration()
331 def IsExportable(self
):
332 return not self
.IsDeclaration()
334 def Requires(self
, node
):
335 # TODO(nnorwitz): handle namespaces, etc.
337 for token_list
in self
.bases
:
338 # TODO(nnorwitz): bases are tokens, do name comparision.
339 for token
in token_list
:
340 if token
.name
== node
.name
:
342 # TODO(nnorwitz): search in body too.
347 if self
.templated_types
:
348 name
+= '<%s>' % self
.templated_types
349 suffix
= '%s, %s, %s' % (name
, self
.bases
, self
.body
)
350 return self
._TypeStringHelper
(suffix
)
357 class Function(_GenericDeclaration
):
358 def __init__(self
, start
, end
, name
, return_type
, parameters
,
359 modifiers
, templated_types
, body
, namespace
):
360 _GenericDeclaration
.__init
__(self
, start
, end
, name
, namespace
)
361 converter
= TypeConverter(namespace
)
362 self
.return_type
= converter
.CreateReturnType(return_type
)
363 self
.parameters
= converter
.ToParameters(parameters
)
364 self
.modifiers
= modifiers
366 self
.templated_types
= templated_types
368 def IsDeclaration(self
):
369 return self
.body
is None
371 def IsDefinition(self
):
372 return self
.body
is not None
374 def IsExportable(self
):
375 if self
.return_type
and 'static' in self
.return_type
.modifiers
:
377 return None not in self
.namespace
379 def Requires(self
, node
):
381 # TODO(nnorwitz): parameters are tokens, do name comparision.
382 for p
in self
.parameters
:
383 if p
.name
== node
.name
:
385 # TODO(nnorwitz): search in body too.
389 # TODO(nnorwitz): add templated_types.
390 suffix
= ('%s %s(%s), 0x%02x, %s' %
391 (self
.return_type
, self
.name
, self
.parameters
,
392 self
.modifiers
, self
.body
))
393 return self
._TypeStringHelper
(suffix
)
396 class Method(Function
):
397 def __init__(self
, start
, end
, name
, in_class
, return_type
, parameters
,
398 modifiers
, templated_types
, body
, namespace
):
399 Function
.__init
__(self
, start
, end
, name
, return_type
, parameters
,
400 modifiers
, templated_types
, body
, namespace
)
401 # TODO(nnorwitz): in_class could also be a namespace which can
402 # mess up finding functions properly.
403 self
.in_class
= in_class
406 class Type(_GenericDeclaration
):
407 """Type used for any variable (eg class, primitive, struct, etc)."""
409 def __init__(self
, start
, end
, name
, templated_types
, modifiers
,
410 reference
, pointer
, array
):
413 name: str name of main type
414 templated_types: [Class (Type?)] template type info between <>
415 modifiers: [str] type modifiers (keywords) eg, const, mutable, etc.
416 reference, pointer, array: bools
418 _GenericDeclaration
.__init
__(self
, start
, end
, name
, [])
419 self
.templated_types
= templated_types
420 if not name
and modifiers
:
421 self
.name
= modifiers
.pop()
422 self
.modifiers
= modifiers
423 self
.reference
= reference
424 self
.pointer
= pointer
430 prefix
= ' '.join(self
.modifiers
) + ' '
431 name
= str(self
.name
)
432 if self
.templated_types
:
433 name
+= '<%s>' % self
.templated_types
434 suffix
= prefix
+ name
441 return self
._TypeStringHelper
(suffix
)
443 # By definition, Is* are always False. A Type can only exist in
444 # some sort of variable declaration, parameter, or return value.
445 def IsDeclaration(self
):
448 def IsDefinition(self
):
451 def IsExportable(self
):
455 class TypeConverter(object):
457 def __init__(self
, namespace_stack
):
458 self
.namespace_stack
= namespace_stack
460 def _GetTemplateEnd(self
, tokens
, start
):
466 if token
.name
== '<':
468 elif token
.name
== '>':
472 return tokens
[start
:end
-1], end
474 def ToType(self
, tokens
):
475 """Convert [Token,...] to [Class(...), ] useful for base classes.
476 For example, code like class Foo : public Bar<x, y> { ... };
477 the "Bar<x, y>" portion gets converted to an AST.
484 reference
= pointer
= array
= False
486 def AddType(templated_types
):
487 # Partition tokens into name and modifier tokens.
490 for t
in name_tokens
:
491 if keywords
.IsKeyword(t
.name
):
492 modifiers
.append(t
.name
)
495 name
= ''.join(names
)
497 result
.append(Type(name_tokens
[0].start
, name_tokens
[-1].end
,
498 name
, templated_types
, modifiers
,
499 reference
, pointer
, array
))
506 if token
.name
== '<':
507 new_tokens
, new_end
= self
._GetTemplateEnd
(tokens
, i
+1)
508 AddType(self
.ToType(new_tokens
))
509 # If there is a comma after the template, we need to consume
510 # that here otherwise it becomes part of the name.
512 reference
= pointer
= array
= False
513 elif token
.name
== ',':
515 reference
= pointer
= array
= False
516 elif token
.name
== '*':
518 elif token
.name
== '&':
520 elif token
.name
== '[':
522 elif token
.name
== ']':
525 name_tokens
.append(token
)
529 # No '<' in the tokens, just a simple name and no template.
533 def DeclarationToParts(self
, parts
, needs_name_removed
):
536 if needs_name_removed
:
537 # Handle default (initial) values properly.
538 for i
, t
in enumerate(parts
):
540 default
= parts
[i
+1:]
541 name
= parts
[i
-1].name
542 if name
== ']' and parts
[i
-2].name
== '[':
543 name
= parts
[i
-3].name
548 if parts
[-1].token_type
== tokenize
.NAME
:
549 name
= parts
.pop().name
551 # TODO(nnorwitz): this is a hack that happens for code like
552 # Register(Foo<T>); where it thinks this is a function call
553 # but it's actually a declaration.
563 if keywords
.IsKeyword(p
.name
):
564 modifiers
.append(p
.name
)
566 templated_tokens
, new_end
= self
._GetTemplateEnd
(parts
, i
+1)
567 templated_types
= self
.ToType(templated_tokens
)
569 # Don't add a spurious :: to data members being initialized.
571 if next_index
< end
and parts
[next_index
].name
== '::':
573 elif p
.name
in ('[', ']', '='):
574 # These are handled elsewhere.
575 other_tokens
.append(p
)
576 elif p
.name
not in ('*', '&', '>'):
577 # Ensure that names have a space between them.
578 if (type_name
and type_name
[-1].token_type
== tokenize
.NAME
and
579 p
.token_type
== tokenize
.NAME
):
580 type_name
.append(tokenize
.Token(tokenize
.SYNTAX
, ' ', 0, 0))
583 other_tokens
.append(p
)
585 type_name
= ''.join([t
.name
for t
in type_name
])
586 return name
, type_name
, templated_types
, modifiers
, default
, other_tokens
588 def ToParameters(self
, tokens
):
593 name
= type_name
= ''
595 pointer
= reference
= array
= False
599 def AddParameter(end
):
601 del default
[0] # Remove flag.
602 parts
= self
.DeclarationToParts(type_modifiers
, True)
603 (name
, type_name
, templated_types
, modifiers
,
604 unused_default
, unused_other_tokens
) = parts
605 parameter_type
= Type(first_token
.start
, first_token
.end
,
606 type_name
, templated_types
, modifiers
,
607 reference
, pointer
, array
)
608 p
= Parameter(first_token
.start
, end
, name
,
609 parameter_type
, default
)
618 # Check for braces before templates, as we can have unmatched '<>'
619 # inside default arguments.
625 type_modifiers
.append(s
)
632 if template_count
> 0:
633 type_modifiers
.append(s
)
637 AddParameter(s
.start
)
638 name
= type_name
= ''
640 pointer
= reference
= array
= False
650 pass # Just don't add to type_modifiers.
652 # Got a default value. Add any value (None) as a flag.
657 type_modifiers
.append(s
)
658 AddParameter(tokens
[-1].end
)
661 def CreateReturnType(self
, return_type_seq
):
662 if not return_type_seq
:
664 start
= return_type_seq
[0].start
665 end
= return_type_seq
[-1].end
666 _
, name
, templated_types
, modifiers
, default
, other_tokens
= \
667 self
.DeclarationToParts(return_type_seq
, False)
668 names
= [n
.name
for n
in other_tokens
]
669 reference
= '&' in names
670 pointer
= '*' in names
672 return Type(start
, end
, name
, templated_types
, modifiers
,
673 reference
, pointer
, array
)
675 def GetTemplateIndices(self
, names
):
676 # names is a list of strings.
677 start
= names
.index('<')
680 if names
[end
] == '>':
685 class AstBuilder(object):
686 def __init__(self
, token_stream
, filename
, in_class
='', visibility
=None,
688 self
.tokens
= token_stream
689 self
.filename
= filename
690 # TODO(nnorwitz): use a better data structure (deque) for the queue.
691 # Switching directions of the "queue" improved perf by about 25%.
692 # Using a deque should be even better since we access from both sides.
693 self
.token_queue
= []
694 self
.namespace_stack
= namespace_stack
[:]
695 self
.in_class
= in_class
697 self
.in_class_name_only
= None
699 self
.in_class_name_only
= in_class
.split('::')[-1]
700 self
.visibility
= visibility
701 self
.in_function
= False
702 self
.current_token
= None
703 # Keep the state whether we are currently handling a typedef or not.
704 self
._handling
_typedef
= False
706 self
.converter
= TypeConverter(self
.namespace_stack
)
708 def HandleError(self
, msg
, token
):
709 printable_queue
= list(reversed(self
.token_queue
[-20:]))
710 sys
.stderr
.write('Got %s in %s @ %s %s\n' %
711 (msg
, self
.filename
, token
, printable_queue
))
715 token
= self
._GetNextToken
()
719 # Get the next token.
720 self
.current_token
= token
722 # Dispatch on the next token type.
723 if token
.token_type
== _INTERNAL_TOKEN
:
724 if token
.name
== _NAMESPACE_POP
:
725 self
.namespace_stack
.pop()
729 result
= self
._GenerateOne
(token
)
730 if result
is not None:
733 self
.HandleError('exception', token
)
736 def _CreateVariable(self
, pos_token
, name
, type_name
, type_modifiers
,
737 ref_pointer_name_seq
, templated_types
, value
=None):
738 reference
= '&' in ref_pointer_name_seq
739 pointer
= '*' in ref_pointer_name_seq
740 array
= '[' in ref_pointer_name_seq
741 var_type
= Type(pos_token
.start
, pos_token
.end
, type_name
,
742 templated_types
, type_modifiers
,
743 reference
, pointer
, array
)
744 return VariableDeclaration(pos_token
.start
, pos_token
.end
,
745 name
, var_type
, value
, self
.namespace_stack
)
747 def _GenerateOne(self
, token
):
748 if token
.token_type
== tokenize
.NAME
:
749 if (keywords
.IsKeyword(token
.name
) and
750 not keywords
.IsBuiltinType(token
.name
)):
751 if token
.name
== 'enum':
752 # Pop the next token and only put it back if it's not
753 # 'class'. This allows us to support the two-token
754 # 'enum class' keyword as if it were simply 'enum'.
755 next
= self
._GetNextToken
()
756 if next
.name
!= 'class':
757 self
._AddBackToken
(next
)
759 method
= getattr(self
, 'handle_' + token
.name
)
761 elif token
.name
== self
.in_class_name_only
:
762 # The token name is the same as the class, must be a ctor if
763 # there is a paren. Otherwise, it's the return type.
764 # Peek ahead to get the next token to figure out which.
765 next
= self
._GetNextToken
()
766 self
._AddBackToken
(next
)
767 if next
.token_type
== tokenize
.SYNTAX
and next
.name
== '(':
768 return self
._GetMethod
([token
], FUNCTION_CTOR
, None, True)
769 # Fall through--handle like any other method.
771 # Handle data or function declaration/definition.
772 syntax
= tokenize
.SYNTAX
773 temp_tokens
, last_token
= \
774 self
._GetVarTokensUpToIgnoringTemplates
(syntax
,
776 temp_tokens
.insert(0, token
)
777 if last_token
.name
== '(':
778 # If there is an assignment before the paren,
779 # this is an expression, not a method.
780 expr
= bool([e
for e
in temp_tokens
if e
.name
== '='])
782 new_temp
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
783 temp_tokens
.append(last_token
)
784 temp_tokens
.extend(new_temp
)
785 last_token
= tokenize
.Token(tokenize
.SYNTAX
, ';', 0, 0)
787 if last_token
.name
== '[':
788 # Handle array, this isn't a method, unless it's an operator.
789 # TODO(nnorwitz): keep the size somewhere.
790 # unused_size = self._GetTokensUpTo(tokenize.SYNTAX, ']')
791 temp_tokens
.append(last_token
)
792 if temp_tokens
[-2].name
== 'operator':
793 temp_tokens
.append(self
._GetNextToken
())
795 temp_tokens2
, last_token
= \
796 self
._GetVarTokensUpTo
(tokenize
.SYNTAX
, ';')
797 temp_tokens
.extend(temp_tokens2
)
799 if last_token
.name
== ';':
800 # Handle data, this isn't a method.
801 parts
= self
.converter
.DeclarationToParts(temp_tokens
, True)
802 (name
, type_name
, templated_types
, modifiers
, default
,
803 unused_other_tokens
) = parts
806 names
= [t
.name
for t
in temp_tokens
]
808 start
, end
= self
.converter
.GetTemplateIndices(names
)
809 names
= names
[:start
] + names
[end
:]
810 default
= ''.join([t
.name
for t
in default
])
811 return self
._CreateVariable
(t0
, name
, type_name
, modifiers
,
812 names
, templated_types
, default
)
813 if last_token
.name
== '{':
814 self
._AddBackTokens
(temp_tokens
[1:])
815 self
._AddBackToken
(last_token
)
816 method_name
= temp_tokens
[0].name
817 method
= getattr(self
, 'handle_' + method_name
, None)
819 # Must be declaring a variable.
820 # TODO(nnorwitz): handle the declaration.
823 return self
._GetMethod
(temp_tokens
, 0, None, False)
824 elif token
.token_type
== tokenize
.SYNTAX
:
825 if token
.name
== '~' and self
.in_class
:
826 # Must be a dtor (probably not in method body).
827 token
= self
._GetNextToken
()
828 # self.in_class can contain A::Name, but the dtor will only
829 # be Name. Make sure to compare against the right value.
830 if (token
.token_type
== tokenize
.NAME
and
831 token
.name
== self
.in_class_name_only
):
832 return self
._GetMethod
([token
], FUNCTION_DTOR
, None, True)
833 # TODO(nnorwitz): handle a lot more syntax.
834 elif token
.token_type
== tokenize
.PREPROCESSOR
:
835 # TODO(nnorwitz): handle more preprocessor directives.
836 # token starts with a #, so remove it and strip whitespace.
837 name
= token
.name
[1:].lstrip()
838 if name
.startswith('include'):
840 name
= name
[7:].strip()
842 # Handle #include \<newline> "header-on-second-line.h".
843 if name
.startswith('\\'):
844 name
= name
[1:].strip()
845 assert name
[0] in '<"', token
846 assert name
[-1] in '>"', token
847 system
= name
[0] == '<'
848 filename
= name
[1:-1]
849 return Include(token
.start
, token
.end
, filename
, system
)
850 if name
.startswith('define'):
852 name
= name
[6:].strip()
855 for i
, c
in enumerate(name
):
857 value
= name
[i
:].lstrip()
860 return Define(token
.start
, token
.end
, name
, value
)
861 if name
.startswith('if') and name
[2:3].isspace():
862 condition
= name
[3:].strip()
863 if condition
.startswith('0') or condition
.startswith('(0)'):
864 self
._SkipIf
0Blocks
()
867 def _GetTokensUpTo(self
, expected_token_type
, expected_token
):
868 return self
._GetVarTokensUpTo
(expected_token_type
, expected_token
)[0]
870 def _GetVarTokensUpTo(self
, expected_token_type
, *expected_tokens
):
871 last_token
= self
._GetNextToken
()
873 while (last_token
.token_type
!= expected_token_type
or
874 last_token
.name
not in expected_tokens
):
875 tokens
.append(last_token
)
876 last_token
= self
._GetNextToken
()
877 return tokens
, last_token
879 # Same as _GetVarTokensUpTo, but skips over '<...>' which could contain an
881 def _GetVarTokensUpToIgnoringTemplates(self
, expected_token_type
,
883 last_token
= self
._GetNextToken
()
886 while (nesting
> 0 or
887 last_token
.token_type
!= expected_token_type
or
888 last_token
.name
not in expected_tokens
):
889 tokens
.append(last_token
)
890 last_token
= self
._GetNextToken
()
891 if last_token
.name
== '<':
893 elif last_token
.name
== '>':
895 return tokens
, last_token
897 # TODO(nnorwitz): remove _IgnoreUpTo() it shouldn't be necesary.
898 def _IgnoreUpTo(self
, token_type
, token
):
899 unused_tokens
= self
._GetTokensUpTo
(token_type
, token
)
901 def _SkipIf0Blocks(self
):
904 token
= self
._GetNextToken
()
905 if token
.token_type
!= tokenize
.PREPROCESSOR
:
908 name
= token
.name
[1:].lstrip()
909 if name
.startswith('endif'):
913 elif name
.startswith('if'):
916 def _GetMatchingChar(self
, open_paren
, close_paren
, GetNextToken
=None):
917 if GetNextToken
is None:
918 GetNextToken
= self
._GetNextToken
919 # Assumes the current token is open_paren and we will consume
920 # and return up to the close_paren.
922 token
= GetNextToken()
924 if token
.token_type
== tokenize
.SYNTAX
:
925 if token
.name
== open_paren
:
927 elif token
.name
== close_paren
:
932 token
= GetNextToken()
935 def _GetParameters(self
):
936 return self
._GetMatchingChar
('(', ')')
939 return self
._GetMatchingChar
('{', '}')
941 def _GetNextToken(self
):
943 return self
.token_queue
.pop()
945 return next(self
.tokens
)
946 except StopIteration:
949 def _AddBackToken(self
, token
):
950 if token
.whence
== tokenize
.WHENCE_STREAM
:
951 token
.whence
= tokenize
.WHENCE_QUEUE
952 self
.token_queue
.insert(0, token
)
954 assert token
.whence
== tokenize
.WHENCE_QUEUE
, token
955 self
.token_queue
.append(token
)
957 def _AddBackTokens(self
, tokens
):
959 if tokens
[-1].whence
== tokenize
.WHENCE_STREAM
:
961 token
.whence
= tokenize
.WHENCE_QUEUE
962 self
.token_queue
[:0] = reversed(tokens
)
964 assert tokens
[-1].whence
== tokenize
.WHENCE_QUEUE
, tokens
965 self
.token_queue
.extend(reversed(tokens
))
967 def GetName(self
, seq
=None):
968 """Returns ([tokens], next_token_info)."""
969 GetNextToken
= self
._GetNextToken
972 GetNextToken
= lambda: next(it
)
973 next_token
= GetNextToken()
975 last_token_was_name
= False
976 while (next_token
.token_type
== tokenize
.NAME
or
977 (next_token
.token_type
== tokenize
.SYNTAX
and
978 next_token
.name
in ('::', '<'))):
979 # Two NAMEs in a row means the identifier should terminate.
980 # It's probably some sort of variable declaration.
981 if last_token_was_name
and next_token
.token_type
== tokenize
.NAME
:
983 last_token_was_name
= next_token
.token_type
== tokenize
.NAME
984 tokens
.append(next_token
)
985 # Handle templated names.
986 if next_token
.name
== '<':
987 tokens
.extend(self
._GetMatchingChar
('<', '>', GetNextToken
))
988 last_token_was_name
= True
989 next_token
= GetNextToken()
990 return tokens
, next_token
992 def GetMethod(self
, modifiers
, templated_types
):
993 return_type_and_name
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, '(')
994 assert len(return_type_and_name
) >= 1
995 return self
._GetMethod
(return_type_and_name
, modifiers
, templated_types
,
998 def _GetMethod(self
, return_type_and_name
, modifiers
, templated_types
,
1000 template_portion
= None
1002 token
= self
._GetNextToken
()
1003 assert token
.token_type
== tokenize
.SYNTAX
, token
1004 if token
.name
== '<':
1005 # Handle templatized dtors.
1006 template_portion
= [token
]
1007 template_portion
.extend(self
._GetMatchingChar
('<', '>'))
1008 token
= self
._GetNextToken
()
1009 assert token
.token_type
== tokenize
.SYNTAX
, token
1010 assert token
.name
== '(', token
1012 name
= return_type_and_name
.pop()
1013 # Handle templatized ctors.
1014 if name
.name
== '>':
1016 while return_type_and_name
[index
].name
!= '<':
1018 template_portion
= return_type_and_name
[index
:] + [name
]
1019 del return_type_and_name
[index
:]
1020 name
= return_type_and_name
.pop()
1021 elif name
.name
== ']':
1022 rt
= return_type_and_name
1023 assert rt
[-1].name
== '[', return_type_and_name
1024 assert rt
[-2].name
== 'operator', return_type_and_name
1025 name_seq
= return_type_and_name
[-2:]
1026 del return_type_and_name
[-2:]
1027 name
= tokenize
.Token(tokenize
.NAME
, 'operator[]',
1028 name_seq
[0].start
, name
.end
)
1029 # Get the open paren so _GetParameters() below works.
1030 unused_open_paren
= self
._GetNextToken
()
1032 # TODO(nnorwitz): store template_portion.
1033 return_type
= return_type_and_name
1036 indices
= return_type
[0]
1038 # Force ctor for templatized ctors.
1039 if name
.name
== self
.in_class
and not modifiers
:
1040 modifiers |
= FUNCTION_CTOR
1041 parameters
= list(self
._GetParameters
())
1042 del parameters
[-1] # Remove trailing ')'.
1044 # Handling operator() is especially weird.
1045 if name
.name
== 'operator' and not parameters
:
1046 token
= self
._GetNextToken
()
1047 assert token
.name
== '(', token
1048 parameters
= list(self
._GetParameters
())
1049 del parameters
[-1] # Remove trailing ')'.
1051 token
= self
._GetNextToken
()
1052 while token
.token_type
== tokenize
.NAME
:
1053 modifier_token
= token
1054 token
= self
._GetNextToken
()
1055 if modifier_token
.name
== 'const':
1056 modifiers |
= FUNCTION_CONST
1057 elif modifier_token
.name
== '__attribute__':
1058 # TODO(nnorwitz): handle more __attribute__ details.
1059 modifiers |
= FUNCTION_ATTRIBUTE
1060 assert token
.name
== '(', token
1061 # Consume everything between the (parens).
1062 unused_tokens
= list(self
._GetMatchingChar
('(', ')'))
1063 token
= self
._GetNextToken
()
1064 elif modifier_token
.name
== 'throw':
1065 modifiers |
= FUNCTION_THROW
1066 assert token
.name
== '(', token
1067 # Consume everything between the (parens).
1068 unused_tokens
= list(self
._GetMatchingChar
('(', ')'))
1069 token
= self
._GetNextToken
()
1070 elif modifier_token
.name
== 'override':
1071 modifiers |
= FUNCTION_OVERRIDE
1072 elif modifier_token
.name
== modifier_token
.name
.upper():
1073 # HACK(nnorwitz): assume that all upper-case names
1074 # are some macro we aren't expanding.
1075 modifiers |
= FUNCTION_UNKNOWN_ANNOTATION
1077 self
.HandleError('unexpected token', modifier_token
)
1079 assert token
.token_type
== tokenize
.SYNTAX
, token
1080 # Handle ctor initializers.
1081 if token
.name
== ':':
1082 # TODO(nnorwitz): anything else to handle for initializer list?
1083 while token
.name
!= ';' and token
.name
!= '{':
1084 token
= self
._GetNextToken
()
1086 # Handle pointer to functions that are really data but look
1087 # like method declarations.
1088 if token
.name
== '(':
1089 if parameters
[0].name
== '*':
1090 # name contains the return type.
1091 name
= parameters
.pop()
1092 # parameters contains the name of the data.
1093 modifiers
= [p
.name
for p
in parameters
]
1094 # Already at the ( to open the parameter list.
1095 function_parameters
= list(self
._GetMatchingChar
('(', ')'))
1096 del function_parameters
[-1] # Remove trailing ')'.
1097 # TODO(nnorwitz): store the function_parameters.
1098 token
= self
._GetNextToken
()
1099 assert token
.token_type
== tokenize
.SYNTAX
, token
1100 assert token
.name
== ';', token
1101 return self
._CreateVariable
(indices
, name
.name
, indices
.name
,
1102 modifiers
, '', None)
1103 # At this point, we got something like:
1104 # return_type (type::*name_)(params);
1105 # This is a data member called name_ that is a function pointer.
1106 # With this code: void (sq_type::*field_)(string&);
1107 # We get: name=void return_type=[] parameters=sq_type ... field_
1108 # TODO(nnorwitz): is return_type always empty?
1109 # TODO(nnorwitz): this isn't even close to being correct.
1110 # Just put in something so we don't crash and can move on.
1111 real_name
= parameters
[-1]
1112 modifiers
= [p
.name
for p
in self
._GetParameters
()]
1113 del modifiers
[-1] # Remove trailing ')'.
1114 return self
._CreateVariable
(indices
, real_name
.name
, indices
.name
,
1115 modifiers
, '', None)
1117 if token
.name
== '{':
1118 body
= list(self
.GetScope())
1119 del body
[-1] # Remove trailing '}'.
1122 if token
.name
== '=':
1123 token
= self
._GetNextToken
()
1125 if token
.name
== 'default' or token
.name
== 'delete':
1126 # Ignore explicitly defaulted and deleted special members
1128 token
= self
._GetNextToken
()
1130 # Handle pure-virtual declarations.
1131 assert token
.token_type
== tokenize
.CONSTANT
, token
1132 assert token
.name
== '0', token
1133 modifiers |
= FUNCTION_PURE_VIRTUAL
1134 token
= self
._GetNextToken
()
1136 if token
.name
== '[':
1137 # TODO(nnorwitz): store tokens and improve parsing.
1138 # template <typename T, size_t N> char (&ASH(T (&seq)[N]))[N];
1139 tokens
= list(self
._GetMatchingChar
('[', ']'))
1140 token
= self
._GetNextToken
()
1142 assert token
.name
== ';', (token
, return_type_and_name
, parameters
)
1144 # Looks like we got a method, not a function.
1145 if len(return_type
) > 2 and return_type
[-1].name
== '::':
1146 return_type
, in_class
= \
1147 self
._GetReturnTypeAndClassName
(return_type
)
1148 return Method(indices
.start
, indices
.end
, name
.name
, in_class
,
1149 return_type
, parameters
, modifiers
, templated_types
,
1150 body
, self
.namespace_stack
)
1151 return Function(indices
.start
, indices
.end
, name
.name
, return_type
,
1152 parameters
, modifiers
, templated_types
, body
,
1153 self
.namespace_stack
)
1155 def _GetReturnTypeAndClassName(self
, token_seq
):
1156 # Splitting the return type from the class name in a method
1157 # can be tricky. For example, Return::Type::Is::Hard::To::Find().
1158 # Where is the return type and where is the class name?
1159 # The heuristic used is to pull the last name as the class name.
1160 # This includes all the templated type info.
1161 # TODO(nnorwitz): if there is only One name like in the
1162 # example above, punt and assume the last bit is the class name.
1164 # Ignore a :: prefix, if exists so we can find the first real name.
1166 if token_seq
[0].name
== '::':
1168 # Ignore a :: suffix, if exists.
1169 end
= len(token_seq
) - 1
1170 if token_seq
[end
-1].name
== '::':
1173 # Make a copy of the sequence so we can append a sentinel
1174 # value. This is required for GetName will has to have some
1175 # terminating condition beyond the last name.
1176 seq_copy
= token_seq
[i
:end
]
1177 seq_copy
.append(tokenize
.Token(tokenize
.SYNTAX
, '', 0, 0))
1180 # Iterate through the sequence parsing out each name.
1181 new_name
, next
= self
.GetName(seq_copy
[i
:])
1182 assert new_name
, 'Got empty new_name, next=%s' % next
1183 # We got a pointer or ref. Add it to the name.
1184 if next
and next
.token_type
== tokenize
.SYNTAX
:
1185 new_name
.append(next
)
1186 names
.append(new_name
)
1189 # Now that we have the names, it's time to undo what we did.
1191 # Remove the sentinel value.
1193 # Flatten the token sequence for the return type.
1194 return_type
= [e
for seq
in names
[:-1] for e
in seq
]
1195 # The class name is the last name.
1196 class_name
= names
[-1]
1197 return return_type
, class_name
1199 def handle_bool(self
):
1202 def handle_char(self
):
1205 def handle_int(self
):
1208 def handle_long(self
):
1211 def handle_short(self
):
1214 def handle_double(self
):
1217 def handle_float(self
):
1220 def handle_void(self
):
1223 def handle_wchar_t(self
):
1226 def handle_unsigned(self
):
1229 def handle_signed(self
):
1232 def _GetNestedType(self
, ctor
):
1234 name_tokens
, token
= self
.GetName()
1236 name
= ''.join([t
.name
for t
in name_tokens
])
1238 # Handle forward declarations.
1239 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== ';':
1240 return ctor(token
.start
, token
.end
, name
, None,
1241 self
.namespace_stack
)
1243 if token
.token_type
== tokenize
.NAME
and self
._handling
_typedef
:
1244 self
._AddBackToken
(token
)
1245 return ctor(token
.start
, token
.end
, name
, None,
1246 self
.namespace_stack
)
1248 # Must be the type declaration.
1249 fields
= list(self
._GetMatchingChar
('{', '}'))
1250 del fields
[-1] # Remove trailing '}'.
1251 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== '{':
1252 next
= self
._GetNextToken
()
1253 new_type
= ctor(token
.start
, token
.end
, name
, fields
,
1254 self
.namespace_stack
)
1255 # A name means this is an anonymous type and the name
1256 # is the variable declaration.
1257 if next
.token_type
!= tokenize
.NAME
:
1262 # Must be variable declaration using the type prefixed with keyword.
1263 assert token
.token_type
== tokenize
.NAME
, token
1264 return self
._CreateVariable
(token
, token
.name
, name
, [], '', None)
1266 def handle_struct(self
):
1267 # Special case the handling typedef/aliasing of structs here.
1268 # It would be a pain to handle in the class code.
1269 name_tokens
, var_token
= self
.GetName()
1271 next_token
= self
._GetNextToken
()
1272 is_syntax
= (var_token
.token_type
== tokenize
.SYNTAX
and
1273 var_token
.name
[0] in '*&')
1274 is_variable
= (var_token
.token_type
== tokenize
.NAME
and
1275 next_token
.name
== ';')
1276 variable
= var_token
1277 if is_syntax
and not is_variable
:
1278 variable
= next_token
1279 temp
= self
._GetNextToken
()
1280 if temp
.token_type
== tokenize
.SYNTAX
and temp
.name
== '(':
1281 # Handle methods declared to return a struct.
1283 struct
= tokenize
.Token(tokenize
.NAME
, 'struct',
1284 t0
.start
-7, t0
.start
-2)
1285 type_and_name
= [struct
]
1286 type_and_name
.extend(name_tokens
)
1287 type_and_name
.extend((var_token
, next_token
))
1288 return self
._GetMethod
(type_and_name
, 0, None, False)
1289 assert temp
.name
== ';', (temp
, name_tokens
, var_token
)
1290 if is_syntax
or (is_variable
and not self
._handling
_typedef
):
1291 modifiers
= ['struct']
1292 type_name
= ''.join([t
.name
for t
in name_tokens
])
1293 position
= name_tokens
[0]
1294 return self
._CreateVariable
(position
, variable
.name
, type_name
,
1295 modifiers
, var_token
.name
, None)
1296 name_tokens
.extend((var_token
, next_token
))
1297 self
._AddBackTokens
(name_tokens
)
1299 self
._AddBackToken
(var_token
)
1300 return self
._GetClass
(Struct
, VISIBILITY_PUBLIC
, None)
1302 def handle_union(self
):
1303 return self
._GetNestedType
(Union
)
1305 def handle_enum(self
):
1306 return self
._GetNestedType
(Enum
)
1308 def handle_auto(self
):
1309 # TODO(nnorwitz): warn about using auto? Probably not since it
1310 # will be reclaimed and useful for C++0x.
1313 def handle_register(self
):
1316 def handle_const(self
):
1319 def handle_inline(self
):
1322 def handle_extern(self
):
1325 def handle_static(self
):
1328 def handle_virtual(self
):
1329 # What follows must be a method.
1330 token
= token2
= self
._GetNextToken
()
1331 if token
.name
== 'inline':
1332 # HACK(nnorwitz): handle inline dtors by ignoring 'inline'.
1333 token2
= self
._GetNextToken
()
1334 if token2
.token_type
== tokenize
.SYNTAX
and token2
.name
== '~':
1335 return self
.GetMethod(FUNCTION_VIRTUAL
+ FUNCTION_DTOR
, None)
1336 assert token
.token_type
== tokenize
.NAME
or token
.name
== '::', token
1337 return_type_and_name
, _
= self
._GetVarTokensUpToIgnoringTemplates
(
1338 tokenize
.SYNTAX
, '(') # )
1339 return_type_and_name
.insert(0, token
)
1340 if token2
is not token
:
1341 return_type_and_name
.insert(1, token2
)
1342 return self
._GetMethod
(return_type_and_name
, FUNCTION_VIRTUAL
,
1345 def handle_volatile(self
):
1348 def handle_mutable(self
):
1351 def handle_public(self
):
1352 assert self
.in_class
1353 self
.visibility
= VISIBILITY_PUBLIC
1355 def handle_protected(self
):
1356 assert self
.in_class
1357 self
.visibility
= VISIBILITY_PROTECTED
1359 def handle_private(self
):
1360 assert self
.in_class
1361 self
.visibility
= VISIBILITY_PRIVATE
1363 def handle_friend(self
):
1364 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1367 return Friend(t0
.start
, t0
.end
, tokens
, self
.namespace_stack
)
1369 def handle_static_cast(self
):
1372 def handle_const_cast(self
):
1375 def handle_dynamic_cast(self
):
1378 def handle_reinterpret_cast(self
):
1381 def handle_new(self
):
1384 def handle_delete(self
):
1385 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1387 return Delete(tokens
[0].start
, tokens
[0].end
, tokens
)
1389 def handle_typedef(self
):
1390 token
= self
._GetNextToken
()
1391 if (token
.token_type
== tokenize
.NAME
and
1392 keywords
.IsKeyword(token
.name
)):
1393 # Token must be struct/enum/union/class.
1394 method
= getattr(self
, 'handle_' + token
.name
)
1395 self
._handling
_typedef
= True
1397 self
._handling
_typedef
= False
1401 # Get the remainder of the typedef up to the semi-colon.
1402 tokens
.extend(self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';'))
1404 # TODO(nnorwitz): clean all this up.
1412 if name
.name
== ')':
1413 # HACK(nnorwitz): Handle pointers to functions "properly".
1414 if (len(tokens
) >= 4 and
1415 tokens
[1].name
== '(' and tokens
[2].name
== '*'):
1418 elif name
.name
== ']':
1419 # HACK(nnorwitz): Handle arrays properly.
1420 if len(tokens
) >= 2:
1424 if tokens
and isinstance(tokens
[0], tokenize
.Token
):
1425 new_type
= self
.converter
.ToType(tokens
)[0]
1426 return Typedef(indices
.start
, indices
.end
, name
.name
,
1427 new_type
, self
.namespace_stack
)
1429 def handle_typeid(self
):
1430 pass # Not needed yet.
1432 def handle_typename(self
):
1433 pass # Not needed yet.
1435 def _GetTemplatedTypes(self
):
1437 tokens
= list(self
._GetMatchingChar
('<', '>'))
1438 len_tokens
= len(tokens
) - 1 # Ignore trailing '>'.
1440 while i
< len_tokens
:
1441 key
= tokens
[i
].name
1443 if keywords
.IsKeyword(key
) or key
== ',':
1445 type_name
= default
= None
1448 if tokens
[i
-1].name
== '=':
1449 assert i
< len_tokens
, '%s %s' % (i
, tokens
)
1450 default
, unused_next_token
= self
.GetName(tokens
[i
:])
1453 if tokens
[i
-1].name
!= ',':
1454 # We got something like: Type variable.
1455 # Re-adjust the key (variable) and type_name (Type).
1456 key
= tokens
[i
-1].name
1457 type_name
= tokens
[i
-2]
1459 result
[key
] = (type_name
, default
)
1462 def handle_template(self
):
1463 token
= self
._GetNextToken
()
1464 assert token
.token_type
== tokenize
.SYNTAX
, token
1465 assert token
.name
== '<', token
1466 templated_types
= self
._GetTemplatedTypes
()
1467 # TODO(nnorwitz): for now, just ignore the template params.
1468 token
= self
._GetNextToken
()
1469 if token
.token_type
== tokenize
.NAME
:
1470 if token
.name
== 'class':
1471 return self
._GetClass
(Class
, VISIBILITY_PRIVATE
, templated_types
)
1472 elif token
.name
== 'struct':
1473 return self
._GetClass
(Struct
, VISIBILITY_PUBLIC
, templated_types
)
1474 elif token
.name
== 'friend':
1475 return self
.handle_friend()
1476 self
._AddBackToken
(token
)
1477 tokens
, last
= self
._GetVarTokensUpTo
(tokenize
.SYNTAX
, '(', ';')
1479 self
._AddBackTokens
(tokens
)
1480 if last
.name
== '(':
1481 return self
.GetMethod(FUNCTION_NONE
, templated_types
)
1482 # Must be a variable definition.
1485 def handle_true(self
):
1486 pass # Nothing to do.
1488 def handle_false(self
):
1489 pass # Nothing to do.
1491 def handle_asm(self
):
1492 pass # Not needed yet.
1494 def handle_class(self
):
1495 return self
._GetClass
(Class
, VISIBILITY_PRIVATE
, None)
1497 def _GetBases(self
):
1501 token
= self
._GetNextToken
()
1502 assert token
.token_type
== tokenize
.NAME
, token
1503 # TODO(nnorwitz): store kind of inheritance...maybe.
1504 if token
.name
not in ('public', 'protected', 'private'):
1505 # If inheritance type is not specified, it is private.
1506 # Just put the token back so we can form a name.
1507 # TODO(nnorwitz): it would be good to warn about this.
1508 self
._AddBackToken
(token
)
1510 # Check for virtual inheritance.
1511 token
= self
._GetNextToken
()
1512 if token
.name
!= 'virtual':
1513 self
._AddBackToken
(token
)
1515 # TODO(nnorwitz): store that we got virtual for this base.
1517 base
, next_token
= self
.GetName()
1518 bases_ast
= self
.converter
.ToType(base
)
1519 assert len(bases_ast
) == 1, bases_ast
1520 bases
.append(bases_ast
[0])
1521 assert next_token
.token_type
== tokenize
.SYNTAX
, next_token
1522 if next_token
.name
== '{':
1525 # Support multiple inheritance.
1526 assert next_token
.name
== ',', next_token
1529 def _GetClass(self
, class_type
, visibility
, templated_types
):
1531 class_token
= self
._GetNextToken
()
1532 if class_token
.token_type
!= tokenize
.NAME
:
1533 assert class_token
.token_type
== tokenize
.SYNTAX
, class_token
1536 # Skip any macro (e.g. storage class specifiers) after the
1538 next_token
= self
._GetNextToken
()
1539 if next_token
.token_type
== tokenize
.NAME
:
1540 self
._AddBackToken
(next_token
)
1542 self
._AddBackTokens
([class_token
, next_token
])
1543 name_tokens
, token
= self
.GetName()
1544 class_name
= ''.join([t
.name
for t
in name_tokens
])
1546 if token
.token_type
== tokenize
.SYNTAX
:
1547 if token
.name
== ';':
1548 # Forward declaration.
1549 return class_type(class_token
.start
, class_token
.end
,
1550 class_name
, None, templated_types
, None,
1551 self
.namespace_stack
)
1552 if token
.name
in '*&':
1553 # Inline forward declaration. Could be method or data.
1554 name_token
= self
._GetNextToken
()
1555 next_token
= self
._GetNextToken
()
1556 if next_token
.name
== ';':
1558 modifiers
= ['class']
1559 return self
._CreateVariable
(class_token
, name_token
.name
,
1561 modifiers
, token
.name
, None)
1563 # Assume this is a method.
1564 tokens
= (class_token
, token
, name_token
, next_token
)
1565 self
._AddBackTokens
(tokens
)
1566 return self
.GetMethod(FUNCTION_NONE
, None)
1567 if token
.name
== ':':
1568 bases
, token
= self
._GetBases
()
1571 if token
.token_type
== tokenize
.SYNTAX
and token
.name
== '{':
1572 assert token
.token_type
== tokenize
.SYNTAX
, token
1573 assert token
.name
== '{', token
1575 ast
= AstBuilder(self
.GetScope(), self
.filename
, class_name
,
1576 visibility
, self
.namespace_stack
)
1577 body
= list(ast
.Generate())
1579 if not self
._handling
_typedef
:
1580 token
= self
._GetNextToken
()
1581 if token
.token_type
!= tokenize
.NAME
:
1582 assert token
.token_type
== tokenize
.SYNTAX
, token
1583 assert token
.name
== ';', token
1585 new_class
= class_type(class_token
.start
, class_token
.end
,
1586 class_name
, bases
, None,
1587 body
, self
.namespace_stack
)
1590 return self
._CreateVariable
(class_token
,
1591 token
.name
, new_class
,
1592 modifiers
, token
.name
, None)
1594 if not self
._handling
_typedef
:
1595 self
.HandleError('non-typedef token', token
)
1596 self
._AddBackToken
(token
)
1598 return class_type(class_token
.start
, class_token
.end
, class_name
,
1599 bases
, templated_types
, body
, self
.namespace_stack
)
1601 def handle_namespace(self
):
1602 # Support anonymous namespaces.
1604 name_tokens
, token
= self
.GetName()
1606 name
= ''.join([t
.name
for t
in name_tokens
])
1607 self
.namespace_stack
.append(name
)
1608 assert token
.token_type
== tokenize
.SYNTAX
, token
1609 # Create an internal token that denotes when the namespace is complete.
1610 internal_token
= tokenize
.Token(_INTERNAL_TOKEN
, _NAMESPACE_POP
,
1612 internal_token
.whence
= token
.whence
1613 if token
.name
== '=':
1614 # TODO(nnorwitz): handle aliasing namespaces.
1615 name
, next_token
= self
.GetName()
1616 assert next_token
.name
== ';', next_token
1617 self
._AddBackToken
(internal_token
)
1619 assert token
.name
== '{', token
1620 tokens
= list(self
.GetScope())
1621 # Replace the trailing } with the internal namespace pop token.
1622 tokens
[-1] = internal_token
1623 # Handle namespace with nothing in it.
1624 self
._AddBackTokens
(tokens
)
1627 def handle_using(self
):
1628 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1630 return Using(tokens
[0].start
, tokens
[0].end
, tokens
)
1632 def handle_explicit(self
):
1633 assert self
.in_class
1634 # Nothing much to do.
1635 # TODO(nnorwitz): maybe verify the method name == class name.
1636 # This must be a ctor.
1637 return self
.GetMethod(FUNCTION_CTOR
, None)
1639 def handle_this(self
):
1640 pass # Nothing to do.
1642 def handle_operator(self
):
1643 # Pull off the next token(s?) and make that part of the method name.
1646 def handle_sizeof(self
):
1649 def handle_case(self
):
1652 def handle_switch(self
):
1655 def handle_default(self
):
1656 token
= self
._GetNextToken
()
1657 assert token
.token_type
== tokenize
.SYNTAX
1658 assert token
.name
== ':'
1660 def handle_if(self
):
1663 def handle_else(self
):
1666 def handle_return(self
):
1667 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1669 return Return(self
.current_token
.start
, self
.current_token
.end
, None)
1670 return Return(tokens
[0].start
, tokens
[0].end
, tokens
)
1672 def handle_goto(self
):
1673 tokens
= self
._GetTokensUpTo
(tokenize
.SYNTAX
, ';')
1674 assert len(tokens
) == 1, str(tokens
)
1675 return Goto(tokens
[0].start
, tokens
[0].end
, tokens
[0].name
)
1677 def handle_try(self
):
1678 pass # Not needed yet.
1680 def handle_catch(self
):
1681 pass # Not needed yet.
1683 def handle_throw(self
):
1684 pass # Not needed yet.
1686 def handle_while(self
):
1689 def handle_do(self
):
1692 def handle_for(self
):
1695 def handle_break(self
):
1696 self
._IgnoreUpTo
(tokenize
.SYNTAX
, ';')
1698 def handle_continue(self
):
1699 self
._IgnoreUpTo
(tokenize
.SYNTAX
, ';')
1702 def BuilderFromSource(source
, filename
):
1703 """Utility method that returns an AstBuilder from source code.
1706 source: 'C++ source code'
1712 return AstBuilder(tokenize
.GetTokens(source
), filename
)
1715 def PrintIndentifiers(filename
, should_print
):
1716 """Prints all identifiers for a C++ source file.
1720 should_print: predicate with signature: bool Function(token)
1722 source
= utils
.ReadFile(filename
, False)
1724 sys
.stderr
.write('Unable to find: %s\n' % filename
)
1727 #print('Processing %s' % actual_filename)
1728 builder
= BuilderFromSource(source
, filename
)
1730 for node
in builder
.Generate():
1731 if should_print(node
):
1733 except KeyboardInterrupt:
1739 def PrintAllIndentifiers(filenames
, should_print
):
1740 """Prints all identifiers for each C++ source file in filenames.
1743 filenames: ['file1', 'file2', ...]
1744 should_print: predicate with signature: bool Function(token)
1746 for path
in filenames
:
1747 PrintIndentifiers(path
, should_print
)
1751 for filename
in argv
[1:]:
1752 source
= utils
.ReadFile(filename
)
1756 print('Processing %s' % filename
)
1757 builder
= BuilderFromSource(source
, filename
)
1759 entire_ast
= filter(None, builder
.Generate())
1760 except KeyboardInterrupt:
1763 # Already printed a warning, print the traceback and continue.
1764 traceback
.print_exc()
1767 for ast
in entire_ast
:
1771 if __name__
== '__main__':