3 # Copyright 2008, Google Inc.
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above
13 # copyright notice, this list of conditions and the following disclaimer
14 # in the documentation and/or other materials provided with the
16 # * Neither the name of Google Inc. nor the names of its
17 # contributors may be used to endorse or promote products derived from
18 # this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 """pump v0.2.0 - Pretty Useful for Meta Programming.
34 A tool for preprocessor meta programming. Useful for generating
35 repetitive boilerplate code. Especially useful for writing C++
36 classes, functions, macros, and templates that need to work with
37 various number of arguments.
44 Converts foo.cc.pump to foo.cc.
48 ATOMIC_CODE ::= $var ID = EXPRESSION
49 | $var ID = [[ CODE ]]
50 | $range ID EXPRESSION..EXPRESSION
51 | $for ID SEPARATOR [[ CODE ]]
55 | $if EXPRESSION [[ CODE ]] ELSE_BRANCH
58 SEPARATOR ::= RAW_CODE | EMPTY
59 ELSE_BRANCH ::= $else [[ CODE ]]
60 | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
62 EXPRESSION has Python syntax.
65 __author__
= 'wan@google.com (Zhanyong Wan)'
73 (re
.compile(r
'\$var\s+'), '$var'),
74 (re
.compile(r
'\$elif\s+'), '$elif'),
75 (re
.compile(r
'\$else\s+'), '$else'),
76 (re
.compile(r
'\$for\s+'), '$for'),
77 (re
.compile(r
'\$if\s+'), '$if'),
78 (re
.compile(r
'\$range\s+'), '$range'),
79 (re
.compile(r
'\$[_A-Za-z]\w*'), '$id'),
80 (re
.compile(r
'\$\(\$\)'), '$($)'),
81 (re
.compile(r
'\$'), '$'),
82 (re
.compile(r
'\[\[\n?'), '[['),
83 (re
.compile(r
'\]\]\n?'), ']]'),
88 """Represents a position (line and column) in a text file."""
90 def __init__(self
, line
=-1, column
=-1):
94 def __eq__(self
, rhs
):
95 return self
.line
== rhs
.line
and self
.column
== rhs
.column
97 def __ne__(self
, rhs
):
98 return not self
== rhs
100 def __lt__(self
, rhs
):
101 return self
.line
< rhs
.line
or (
102 self
.line
== rhs
.line
and self
.column
< rhs
.column
)
104 def __le__(self
, rhs
):
105 return self
< rhs
or self
== rhs
107 def __gt__(self
, rhs
):
110 def __ge__(self
, rhs
):
117 return '%s(%s)' % (self
.line
+ 1, self
.column
)
119 def __add__(self
, offset
):
120 return Cursor(self
.line
, self
.column
+ offset
)
122 def __sub__(self
, offset
):
123 return Cursor(self
.line
, self
.column
- offset
)
126 """Returns a copy of self."""
128 return Cursor(self
.line
, self
.column
)
131 # Special cursor to indicate the end-of-file.
133 """Returns the special cursor to denote the end-of-file."""
134 return Cursor(-1, -1)
138 """Represents a token in a Pump source file."""
140 def __init__(self
, start
=None, end
=None, value
=None, token_type
=None):
150 self
.token_type
= token_type
153 return 'Token @%s: \'%s\' type=%s' % (
154 self
.start
, self
.value
, self
.token_type
)
157 """Returns a copy of self."""
159 return Token(self
.start
.Clone(), self
.end
.Clone(), self
.value
,
163 def StartsWith(lines
, pos
, string
):
164 """Returns True iff the given position in lines starts with 'string'."""
166 return lines
[pos
.line
][pos
.column
:].startswith(string
)
169 def FindFirstInLine(line
, token_table
):
170 best_match_start
= -1
171 for (regex
, token_type
) in token_table
:
172 m
= regex
.search(line
)
174 # We found regex in lines
175 if best_match_start
< 0 or m
.start() < best_match_start
:
176 best_match_start
= m
.start()
177 best_match_length
= m
.end() - m
.start()
178 best_match_token_type
= token_type
180 if best_match_start
< 0:
183 return (best_match_start
, best_match_length
, best_match_token_type
)
186 def FindFirst(lines
, token_table
, cursor
):
187 """Finds the first occurrence of any string in strings in lines."""
189 start
= cursor
.Clone()
190 cur_line_number
= cursor
.line
191 for line
in lines
[start
.line
:]:
192 if cur_line_number
== start
.line
:
193 line
= line
[start
.column
:]
194 m
= FindFirstInLine(line
, token_table
)
196 # We found a regex in line.
197 (start_column
, length
, token_type
) = m
198 if cur_line_number
== start
.line
:
199 start_column
+= start
.column
200 found_start
= Cursor(cur_line_number
, start_column
)
201 found_end
= found_start
+ length
202 return MakeToken(lines
, found_start
, found_end
, token_type
)
204 # We failed to find str in lines
208 def SubString(lines
, start
, end
):
209 """Returns a substring in lines."""
212 end
= Cursor(len(lines
) - 1, len(lines
[-1]))
217 if start
.line
== end
.line
:
218 return lines
[start
.line
][start
.column
:end
.column
]
220 result_lines
= ([lines
[start
.line
][start
.column
:]] +
221 lines
[start
.line
+ 1:end
.line
] +
222 [lines
[end
.line
][:end
.column
]])
223 return ''.join(result_lines
)
226 def StripMetaComments(str):
227 """Strip meta comments from each line in the given string."""
229 # First, completely remove lines containing nothing but a meta
230 # comment, including the trailing \n.
231 str = re
.sub(r
'^\s*\$\$.*\n', '', str)
233 # Then, remove meta comments from contentful lines.
234 return re
.sub(r
'\s*\$\$.*', '', str)
237 def MakeToken(lines
, start
, end
, token_type
):
238 """Creates a new instance of Token."""
240 return Token(start
, end
, SubString(lines
, start
, end
), token_type
)
243 def ParseToken(lines
, pos
, regex
, token_type
):
244 line
= lines
[pos
.line
][pos
.column
:]
245 m
= regex
.search(line
)
246 if m
and not m
.start():
247 return MakeToken(lines
, pos
, pos
+ m
.end(), token_type
)
249 print 'ERROR: %s expected at %s.' % (token_type
, pos
)
253 ID_REGEX
= re
.compile(r
'[_A-Za-z]\w*')
254 EQ_REGEX
= re
.compile(r
'=')
255 REST_OF_LINE_REGEX
= re
.compile(r
'.*?(?=$|\$\$)')
256 OPTIONAL_WHITE_SPACES_REGEX
= re
.compile(r
'\s*')
257 WHITE_SPACE_REGEX
= re
.compile(r
'\s')
258 DOT_DOT_REGEX
= re
.compile(r
'\.\.')
261 def Skip(lines
, pos
, regex
):
262 line
= lines
[pos
.line
][pos
.column
:]
263 m
= re
.search(regex
, line
)
264 if m
and not m
.start():
270 def SkipUntil(lines
, pos
, regex
, token_type
):
271 line
= lines
[pos
.line
][pos
.column
:]
272 m
= re
.search(regex
, line
)
274 return pos
+ m
.start()
276 print ('ERROR: %s expected on line %s after column %s.' %
277 (token_type
, pos
.line
+ 1, pos
.column
))
281 def ParseExpTokenInParens(lines
, pos
):
282 def ParseInParens(pos
):
283 pos
= Skip(lines
, pos
, OPTIONAL_WHITE_SPACES_REGEX
)
284 pos
= Skip(lines
, pos
, r
'\(')
286 pos
= Skip(lines
, pos
, r
'\)')
290 pos
= SkipUntil(lines
, pos
, r
'\(|\)', ')')
291 if SubString(lines
, pos
, pos
+ 1) == '(':
293 pos
= Skip(lines
, pos
, r
'\)')
299 pos
= ParseInParens(pos
)
300 return MakeToken(lines
, start
, pos
, 'exp')
303 def RStripNewLineFromToken(token
):
304 if token
.value
.endswith('\n'):
305 return Token(token
.start
, token
.end
, token
.value
[:-1], token
.token_type
)
310 def TokenizeLines(lines
, pos
):
312 found
= FindFirst(lines
, TOKEN_TABLE
, pos
)
314 yield MakeToken(lines
, pos
, Eof(), 'code')
317 if found
.start
== pos
:
319 prev_token_rstripped
= None
321 prev_token
= MakeToken(lines
, pos
, found
.start
, 'code')
322 prev_token_rstripped
= RStripNewLineFromToken(prev_token
)
324 if found
.token_type
== '$var':
325 if prev_token_rstripped
:
326 yield prev_token_rstripped
328 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
330 pos
= Skip(lines
, id_token
.end
, OPTIONAL_WHITE_SPACES_REGEX
)
332 eq_token
= ParseToken(lines
, pos
, EQ_REGEX
, '=')
334 pos
= Skip(lines
, eq_token
.end
, r
'\s*')
336 if SubString(lines
, pos
, pos
+ 2) != '[[':
337 exp_token
= ParseToken(lines
, pos
, REST_OF_LINE_REGEX
, 'exp')
339 pos
= Cursor(exp_token
.end
.line
+ 1, 0)
340 elif found
.token_type
== '$for':
341 if prev_token_rstripped
:
342 yield prev_token_rstripped
344 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
346 pos
= Skip(lines
, id_token
.end
, WHITE_SPACE_REGEX
)
347 elif found
.token_type
== '$range':
348 if prev_token_rstripped
:
349 yield prev_token_rstripped
351 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
353 pos
= Skip(lines
, id_token
.end
, OPTIONAL_WHITE_SPACES_REGEX
)
355 dots_pos
= SkipUntil(lines
, pos
, DOT_DOT_REGEX
, '..')
356 yield MakeToken(lines
, pos
, dots_pos
, 'exp')
357 yield MakeToken(lines
, dots_pos
, dots_pos
+ 2, '..')
359 new_pos
= Cursor(pos
.line
+ 1, 0)
360 yield MakeToken(lines
, pos
, new_pos
, 'exp')
362 elif found
.token_type
== '$':
366 exp_token
= ParseExpTokenInParens(lines
, found
.end
)
369 elif (found
.token_type
== ']]' or found
.token_type
== '$if' or
370 found
.token_type
== '$elif' or found
.token_type
== '$else'):
371 if prev_token_rstripped
:
372 yield prev_token_rstripped
383 """A generator that yields the tokens in the given string."""
385 lines
= s
.splitlines(True)
386 for token
in TokenizeLines(lines
, Cursor(0, 0)):
391 def __init__(self
, atomic_code_list
=None):
392 self
.atomic_code
= atomic_code_list
396 def __init__(self
, identifier
=None, atomic_code
=None):
397 self
.identifier
= identifier
398 self
.atomic_code
= atomic_code
402 def __init__(self
, identifier
=None, exp1
=None, exp2
=None):
403 self
.identifier
= identifier
409 def __init__(self
, identifier
=None, sep
=None, code
=None):
410 self
.identifier
= identifier
416 def __init__(self
, else_branch
=None):
417 self
.else_branch
= else_branch
421 def __init__(self
, exp
=None, then_branch
=None, else_branch
=None):
423 self
.then_branch
= then_branch
424 self
.else_branch
= else_branch
428 def __init__(self
, token
=None):
429 self
.raw_code
= token
432 class LiteralDollarNode
:
433 def __init__(self
, token
):
438 def __init__(self
, token
, python_exp
):
440 self
.python_exp
= python_exp
443 def PopFront(a_list
):
449 def PushFront(a_list
, elem
):
453 def PopToken(a_list
, token_type
=None):
454 token
= PopFront(a_list
)
455 if token_type
is not None and token
.token_type
!= token_type
:
456 print 'ERROR: %s expected at %s' % (token_type
, token
.start
)
457 print 'ERROR: %s found instead' % (token
,)
463 def PeekToken(a_list
):
470 def ParseExpNode(token
):
471 python_exp
= re
.sub(r
'([_A-Za-z]\w*)', r
'self.GetValue("\1")', token
.value
)
472 return ExpNode(token
, python_exp
)
475 def ParseElseNode(tokens
):
476 def Pop(token_type
=None):
477 return PopToken(tokens
, token_type
)
479 next
= PeekToken(tokens
)
482 if next
.token_type
== '$else':
485 code_node
= ParseCodeNode(tokens
)
488 elif next
.token_type
== '$elif':
492 code_node
= ParseCodeNode(tokens
)
494 inner_else_node
= ParseElseNode(tokens
)
495 return CodeNode([IfNode(ParseExpNode(exp
), code_node
, inner_else_node
)])
496 elif not next
.value
.strip():
498 return ParseElseNode(tokens
)
503 def ParseAtomicCodeNode(tokens
):
504 def Pop(token_type
=None):
505 return PopToken(tokens
, token_type
)
507 head
= PopFront(tokens
)
510 return RawCodeNode(head
)
514 next
= PeekToken(tokens
)
515 if next
.token_type
== 'exp':
517 return VarNode(id_token
, ParseExpNode(exp_token
))
519 code_node
= ParseCodeNode(tokens
)
521 return VarNode(id_token
, code_node
)
524 next_token
= PeekToken(tokens
)
525 if next_token
.token_type
== 'code':
526 sep_token
= next_token
531 code_node
= ParseCodeNode(tokens
)
533 return ForNode(id_token
, sep_token
, code_node
)
535 exp_token
= Pop('code')
537 code_node
= ParseCodeNode(tokens
)
539 else_node
= ParseElseNode(tokens
)
540 return IfNode(ParseExpNode(exp_token
), code_node
, else_node
)
543 exp1_token
= Pop('exp')
545 exp2_token
= Pop('exp')
546 return RangeNode(id_token
, ParseExpNode(exp1_token
),
547 ParseExpNode(exp2_token
))
549 return ParseExpNode(Token(head
.start
+ 1, head
.end
, head
.value
[1:], 'id'))
551 return LiteralDollarNode(head
)
553 exp_token
= Pop('exp')
554 return ParseExpNode(exp_token
)
556 code_node
= ParseCodeNode(tokens
)
560 PushFront(tokens
, head
)
564 def ParseCodeNode(tokens
):
565 atomic_code_list
= []
569 atomic_code_node
= ParseAtomicCodeNode(tokens
)
571 atomic_code_list
.append(atomic_code_node
)
574 return CodeNode(atomic_code_list
)
577 def ParseToAST(pump_src_text
):
578 """Convert the given Pump source text into an AST."""
579 tokens
= list(Tokenize(pump_src_text
))
580 code_node
= ParseCodeNode(tokens
)
591 clone
.variables
= self
.variables
[:]
592 clone
.ranges
= self
.ranges
[:]
595 def PushVariable(self
, var
, value
):
596 # If value looks like an int, store it as an int.
598 int_value
= int(value
)
599 if ('%s' % int_value
) == value
:
603 self
.variables
[:0] = [(var
, value
)]
605 def PopVariable(self
):
606 self
.variables
[:1] = []
608 def PushRange(self
, var
, lower
, upper
):
609 self
.ranges
[:0] = [(var
, lower
, upper
)]
614 def GetValue(self
, identifier
):
615 for (var
, value
) in self
.variables
:
616 if identifier
== var
:
619 print 'ERROR: meta variable %s is undefined.' % (identifier
,)
622 def EvalExp(self
, exp
):
624 result
= eval(exp
.python_exp
)
626 print 'ERROR: caught exception %s: %s' % (e
.__class
__.__name
__, e
)
627 print ('ERROR: failed to evaluate meta expression %s at %s' %
628 (exp
.python_exp
, exp
.token
.start
))
632 def GetRange(self
, identifier
):
633 for (var
, lower
, upper
) in self
.ranges
:
634 if identifier
== var
:
635 return (lower
, upper
)
637 print 'ERROR: range %s is undefined.' % (identifier
,)
645 def GetLastLine(self
):
646 index
= self
.string
.rfind('\n')
650 return self
.string
[index
+ 1:]
656 def RunAtomicCode(env
, node
, output
):
657 if isinstance(node
, VarNode
):
658 identifier
= node
.identifier
.value
.strip()
660 RunAtomicCode(env
.Clone(), node
.atomic_code
, result
)
661 value
= result
.string
662 env
.PushVariable(identifier
, value
)
663 elif isinstance(node
, RangeNode
):
664 identifier
= node
.identifier
.value
.strip()
665 lower
= int(env
.EvalExp(node
.exp1
))
666 upper
= int(env
.EvalExp(node
.exp2
))
667 env
.PushRange(identifier
, lower
, upper
)
668 elif isinstance(node
, ForNode
):
669 identifier
= node
.identifier
.value
.strip()
674 (lower
, upper
) = env
.GetRange(identifier
)
675 for i
in range(lower
, upper
+ 1):
676 new_env
= env
.Clone()
677 new_env
.PushVariable(identifier
, i
)
678 RunCode(new_env
, node
.code
, output
)
681 elif isinstance(node
, RawCodeNode
):
682 output
.Append(node
.raw_code
.value
)
683 elif isinstance(node
, IfNode
):
684 cond
= env
.EvalExp(node
.exp
)
686 RunCode(env
.Clone(), node
.then_branch
, output
)
687 elif node
.else_branch
is not None:
688 RunCode(env
.Clone(), node
.else_branch
, output
)
689 elif isinstance(node
, ExpNode
):
690 value
= env
.EvalExp(node
)
691 output
.Append('%s' % (value
,))
692 elif isinstance(node
, LiteralDollarNode
):
694 elif isinstance(node
, CodeNode
):
695 RunCode(env
.Clone(), node
, output
)
702 def RunCode(env
, code_node
, output
):
703 for atomic_code
in code_node
.atomic_code
:
704 RunAtomicCode(env
, atomic_code
, output
)
707 def IsSingleLineComment(cur_line
):
708 return '//' in cur_line
711 def IsInPreprocessorDirective(prev_lines
, cur_line
):
712 if cur_line
.lstrip().startswith('#'):
714 return prev_lines
and prev_lines
[-1].endswith('\\')
717 def WrapComment(line
, output
):
718 loc
= line
.find('//')
719 before_comment
= line
[:loc
].rstrip()
720 if before_comment
== '':
723 output
.append(before_comment
)
724 indent
= len(before_comment
) - len(before_comment
.lstrip())
725 prefix
= indent
*' ' + '// '
726 max_len
= 80 - len(prefix
)
727 comment
= line
[loc
+ 2:].strip()
728 segs
= [seg
for seg
in re
.split(r
'(\w+\W*)', comment
) if seg
!= '']
731 if len((cur_line
+ seg
).rstrip()) < max_len
:
734 if cur_line
.strip() != '':
735 output
.append(prefix
+ cur_line
.rstrip())
736 cur_line
= seg
.lstrip()
737 if cur_line
.strip() != '':
738 output
.append(prefix
+ cur_line
.strip())
741 def WrapCode(line
, line_concat
, output
):
742 indent
= len(line
) - len(line
.lstrip())
743 prefix
= indent
*' ' # Prefix of the current line
744 max_len
= 80 - indent
- len(line_concat
) # Maximum length of the current line
745 new_prefix
= prefix
+ 4*' ' # Prefix of a continuation line
746 new_max_len
= max_len
- 4 # Maximum length of a continuation line
747 # Prefers to wrap a line after a ',' or ';'.
748 segs
= [seg
for seg
in re
.split(r
'([^,;]+[,;]?)', line
.strip()) if seg
!= '']
749 cur_line
= '' # The current line without leading spaces.
751 # If the line is still too long, wrap at a space.
752 while cur_line
== '' and len(seg
.strip()) > max_len
:
754 split_at
= seg
.rfind(' ', 0, max_len
)
755 output
.append(prefix
+ seg
[:split_at
].strip() + line_concat
)
756 seg
= seg
[split_at
+ 1:]
758 max_len
= new_max_len
760 if len((cur_line
+ seg
).rstrip()) < max_len
:
761 cur_line
= (cur_line
+ seg
).lstrip()
763 output
.append(prefix
+ cur_line
.rstrip() + line_concat
)
765 max_len
= new_max_len
766 cur_line
= seg
.lstrip()
767 if cur_line
.strip() != '':
768 output
.append(prefix
+ cur_line
.strip())
771 def WrapPreprocessorDirective(line
, output
):
772 WrapCode(line
, ' \\', output
)
775 def WrapPlainCode(line
, output
):
776 WrapCode(line
, '', output
)
779 def IsMultiLineIWYUPragma(line
):
780 return re
.search(r
'/\* IWYU pragma: ', line
)
783 def IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
784 return (re
.match(r
'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line
) or
785 re
.match(r
'^#include\s', line
) or
786 # Don't break IWYU pragmas, either; that causes iwyu.py problems.
787 re
.search(r
'// IWYU pragma: ', line
))
790 def WrapLongLine(line
, output
):
794 elif IsSingleLineComment(line
):
795 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
796 # The style guide made an exception to allow long header guard lines,
797 # includes and IWYU pragmas.
800 WrapComment(line
, output
)
801 elif IsInPreprocessorDirective(output
, line
):
802 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
803 # The style guide made an exception to allow long header guard lines,
804 # includes and IWYU pragmas.
807 WrapPreprocessorDirective(line
, output
)
808 elif IsMultiLineIWYUPragma(line
):
811 WrapPlainCode(line
, output
)
814 def BeautifyCode(string
):
815 lines
= string
.splitlines()
818 WrapLongLine(line
, output
)
819 output2
= [line
.rstrip() for line
in output
]
820 return '\n'.join(output2
) + '\n'
823 def ConvertFromPumpSource(src_text
):
824 """Return the text generated from the given Pump source text."""
825 ast
= ParseToAST(StripMetaComments(src_text
))
827 RunCode(Env(), ast
, output
)
828 return BeautifyCode(output
.string
)
837 output_str
= ConvertFromPumpSource(file(file_path
, 'r').read())
838 if file_path
.endswith('.pump'):
839 output_file_path
= file_path
[:-5]
841 output_file_path
= '-'
842 if output_file_path
== '-':
845 output_file
= file(output_file_path
, 'w')
846 output_file
.write('// This file was GENERATED by command:\n')
847 output_file
.write('// %s %s\n' %
848 (os
.path
.basename(__file__
), os
.path
.basename(file_path
)))
849 output_file
.write('// DO NOT EDIT BY HAND!!!\n\n')
850 output_file
.write(output_str
)
854 if __name__
== '__main__':