3 # Copyright 2008, Google Inc.
6 # Redistribution and use in source and binary forms, with or without
7 # modification, are permitted provided that the following conditions are
10 # * Redistributions of source code must retain the above copyright
11 # notice, this list of conditions and the following disclaimer.
12 # * Redistributions in binary form must reproduce the above
13 # copyright notice, this list of conditions and the following disclaimer
14 # in the documentation and/or other materials provided with the
16 # * Neither the name of Google Inc. nor the names of its
17 # contributors may be used to endorse or promote products derived from
18 # this software without specific prior written permission.
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 """pump v0.2.0 - Pretty Useful for Meta Programming.
34 A tool for preprocessor meta programming. Useful for generating
35 repetitive boilerplate code. Especially useful for writing C++
36 classes, functions, macros, and templates that need to work with
37 various number of arguments.
44 Converts foo.cc.pump to foo.cc.
48 ATOMIC_CODE ::= $var ID = EXPRESSION
49 | $var ID = [[ CODE ]]
50 | $range ID EXPRESSION..EXPRESSION
51 | $for ID SEPARATOR [[ CODE ]]
55 | $if EXPRESSION [[ CODE ]] ELSE_BRANCH
58 SEPARATOR ::= RAW_CODE | EMPTY
59 ELSE_BRANCH ::= $else [[ CODE ]]
60 | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
62 EXPRESSION has Python syntax.
65 from __future__
import print_function
74 (re
.compile(r
'\$var\s+'), '$var'),
75 (re
.compile(r
'\$elif\s+'), '$elif'),
76 (re
.compile(r
'\$else\s+'), '$else'),
77 (re
.compile(r
'\$for\s+'), '$for'),
78 (re
.compile(r
'\$if\s+'), '$if'),
79 (re
.compile(r
'\$range\s+'), '$range'),
80 (re
.compile(r
'\$[_A-Za-z]\w*'), '$id'),
81 (re
.compile(r
'\$\(\$\)'), '$($)'),
82 (re
.compile(r
'\$'), '$'),
83 (re
.compile(r
'\[\[\n?'), '[['),
84 (re
.compile(r
'\]\]\n?'), ']]'),
89 """Represents a position (line and column) in a text file."""
91 def __init__(self
, line
=-1, column
=-1):
95 def __eq__(self
, rhs
):
96 return self
.line
== rhs
.line
and self
.column
== rhs
.column
98 def __ne__(self
, rhs
):
99 return not self
== rhs
101 def __lt__(self
, rhs
):
102 return self
.line
< rhs
.line
or (
103 self
.line
== rhs
.line
and self
.column
< rhs
.column
)
105 def __le__(self
, rhs
):
106 return self
< rhs
or self
== rhs
108 def __gt__(self
, rhs
):
111 def __ge__(self
, rhs
):
118 return '%s(%s)' % (self
.line
+ 1, self
.column
)
120 def __add__(self
, offset
):
121 return Cursor(self
.line
, self
.column
+ offset
)
123 def __sub__(self
, offset
):
124 return Cursor(self
.line
, self
.column
- offset
)
127 """Returns a copy of self."""
129 return Cursor(self
.line
, self
.column
)
132 # Special cursor to indicate the end-of-file.
134 """Returns the special cursor to denote the end-of-file."""
135 return Cursor(-1, -1)
139 """Represents a token in a Pump source file."""
141 def __init__(self
, start
=None, end
=None, value
=None, token_type
=None):
151 self
.token_type
= token_type
154 return 'Token @%s: \'%s\' type=%s' % (
155 self
.start
, self
.value
, self
.token_type
)
158 """Returns a copy of self."""
160 return Token(self
.start
.Clone(), self
.end
.Clone(), self
.value
,
164 def StartsWith(lines
, pos
, string
):
165 """Returns True iff the given position in lines starts with 'string'."""
167 return lines
[pos
.line
][pos
.column
:].startswith(string
)
170 def FindFirstInLine(line
, token_table
):
171 best_match_start
= -1
172 for (regex
, token_type
) in token_table
:
173 m
= regex
.search(line
)
175 # We found regex in lines
176 if best_match_start
< 0 or m
.start() < best_match_start
:
177 best_match_start
= m
.start()
178 best_match_length
= m
.end() - m
.start()
179 best_match_token_type
= token_type
181 if best_match_start
< 0:
184 return (best_match_start
, best_match_length
, best_match_token_type
)
187 def FindFirst(lines
, token_table
, cursor
):
188 """Finds the first occurrence of any string in strings in lines."""
190 start
= cursor
.Clone()
191 cur_line_number
= cursor
.line
192 for line
in lines
[start
.line
:]:
193 if cur_line_number
== start
.line
:
194 line
= line
[start
.column
:]
195 m
= FindFirstInLine(line
, token_table
)
197 # We found a regex in line.
198 (start_column
, length
, token_type
) = m
199 if cur_line_number
== start
.line
:
200 start_column
+= start
.column
201 found_start
= Cursor(cur_line_number
, start_column
)
202 found_end
= found_start
+ length
203 return MakeToken(lines
, found_start
, found_end
, token_type
)
205 # We failed to find str in lines
209 def SubString(lines
, start
, end
):
210 """Returns a substring in lines."""
213 end
= Cursor(len(lines
) - 1, len(lines
[-1]))
218 if start
.line
== end
.line
:
219 return lines
[start
.line
][start
.column
:end
.column
]
221 result_lines
= ([lines
[start
.line
][start
.column
:]] +
222 lines
[start
.line
+ 1:end
.line
] +
223 [lines
[end
.line
][:end
.column
]])
224 return ''.join(result_lines
)
227 def StripMetaComments(str):
228 """Strip meta comments from each line in the given string."""
230 # First, completely remove lines containing nothing but a meta
231 # comment, including the trailing \n.
232 str = re
.sub(r
'^\s*\$\$.*\n', '', str)
234 # Then, remove meta comments from contentful lines.
235 return re
.sub(r
'\s*\$\$.*', '', str)
238 def MakeToken(lines
, start
, end
, token_type
):
239 """Creates a new instance of Token."""
241 return Token(start
, end
, SubString(lines
, start
, end
), token_type
)
244 def ParseToken(lines
, pos
, regex
, token_type
):
245 line
= lines
[pos
.line
][pos
.column
:]
246 m
= regex
.search(line
)
247 if m
and not m
.start():
248 return MakeToken(lines
, pos
, pos
+ m
.end(), token_type
)
250 print('ERROR: %s expected at %s.' % (token_type
, pos
))
254 ID_REGEX
= re
.compile(r
'[_A-Za-z]\w*')
255 EQ_REGEX
= re
.compile(r
'=')
256 REST_OF_LINE_REGEX
= re
.compile(r
'.*?(?=$|\$\$)')
257 OPTIONAL_WHITE_SPACES_REGEX
= re
.compile(r
'\s*')
258 WHITE_SPACE_REGEX
= re
.compile(r
'\s')
259 DOT_DOT_REGEX
= re
.compile(r
'\.\.')
262 def Skip(lines
, pos
, regex
):
263 line
= lines
[pos
.line
][pos
.column
:]
264 m
= re
.search(regex
, line
)
265 if m
and not m
.start():
271 def SkipUntil(lines
, pos
, regex
, token_type
):
272 line
= lines
[pos
.line
][pos
.column
:]
273 m
= re
.search(regex
, line
)
275 return pos
+ m
.start()
277 print ('ERROR: %s expected on line %s after column %s.' %
278 (token_type
, pos
.line
+ 1, pos
.column
))
282 def ParseExpTokenInParens(lines
, pos
):
283 def ParseInParens(pos
):
284 pos
= Skip(lines
, pos
, OPTIONAL_WHITE_SPACES_REGEX
)
285 pos
= Skip(lines
, pos
, r
'\(')
287 pos
= Skip(lines
, pos
, r
'\)')
291 pos
= SkipUntil(lines
, pos
, r
'\(|\)', ')')
292 if SubString(lines
, pos
, pos
+ 1) == '(':
294 pos
= Skip(lines
, pos
, r
'\)')
300 pos
= ParseInParens(pos
)
301 return MakeToken(lines
, start
, pos
, 'exp')
304 def RStripNewLineFromToken(token
):
305 if token
.value
.endswith('\n'):
306 return Token(token
.start
, token
.end
, token
.value
[:-1], token
.token_type
)
311 def TokenizeLines(lines
, pos
):
313 found
= FindFirst(lines
, TOKEN_TABLE
, pos
)
315 yield MakeToken(lines
, pos
, Eof(), 'code')
318 if found
.start
== pos
:
320 prev_token_rstripped
= None
322 prev_token
= MakeToken(lines
, pos
, found
.start
, 'code')
323 prev_token_rstripped
= RStripNewLineFromToken(prev_token
)
325 if found
.token_type
== '$var':
326 if prev_token_rstripped
:
327 yield prev_token_rstripped
329 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
331 pos
= Skip(lines
, id_token
.end
, OPTIONAL_WHITE_SPACES_REGEX
)
333 eq_token
= ParseToken(lines
, pos
, EQ_REGEX
, '=')
335 pos
= Skip(lines
, eq_token
.end
, r
'\s*')
337 if SubString(lines
, pos
, pos
+ 2) != '[[':
338 exp_token
= ParseToken(lines
, pos
, REST_OF_LINE_REGEX
, 'exp')
340 pos
= Cursor(exp_token
.end
.line
+ 1, 0)
341 elif found
.token_type
== '$for':
342 if prev_token_rstripped
:
343 yield prev_token_rstripped
345 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
347 pos
= Skip(lines
, id_token
.end
, WHITE_SPACE_REGEX
)
348 elif found
.token_type
== '$range':
349 if prev_token_rstripped
:
350 yield prev_token_rstripped
352 id_token
= ParseToken(lines
, found
.end
, ID_REGEX
, 'id')
354 pos
= Skip(lines
, id_token
.end
, OPTIONAL_WHITE_SPACES_REGEX
)
356 dots_pos
= SkipUntil(lines
, pos
, DOT_DOT_REGEX
, '..')
357 yield MakeToken(lines
, pos
, dots_pos
, 'exp')
358 yield MakeToken(lines
, dots_pos
, dots_pos
+ 2, '..')
360 new_pos
= Cursor(pos
.line
+ 1, 0)
361 yield MakeToken(lines
, pos
, new_pos
, 'exp')
363 elif found
.token_type
== '$':
367 exp_token
= ParseExpTokenInParens(lines
, found
.end
)
370 elif (found
.token_type
== ']]' or found
.token_type
== '$if' or
371 found
.token_type
== '$elif' or found
.token_type
== '$else'):
372 if prev_token_rstripped
:
373 yield prev_token_rstripped
384 """A generator that yields the tokens in the given string."""
386 lines
= s
.splitlines(True)
387 for token
in TokenizeLines(lines
, Cursor(0, 0)):
392 def __init__(self
, atomic_code_list
=None):
393 self
.atomic_code
= atomic_code_list
397 def __init__(self
, identifier
=None, atomic_code
=None):
398 self
.identifier
= identifier
399 self
.atomic_code
= atomic_code
403 def __init__(self
, identifier
=None, exp1
=None, exp2
=None):
404 self
.identifier
= identifier
410 def __init__(self
, identifier
=None, sep
=None, code
=None):
411 self
.identifier
= identifier
417 def __init__(self
, else_branch
=None):
418 self
.else_branch
= else_branch
422 def __init__(self
, exp
=None, then_branch
=None, else_branch
=None):
424 self
.then_branch
= then_branch
425 self
.else_branch
= else_branch
429 def __init__(self
, token
=None):
430 self
.raw_code
= token
433 class LiteralDollarNode
:
434 def __init__(self
, token
):
439 def __init__(self
, token
, python_exp
):
441 self
.python_exp
= python_exp
444 def PopFront(a_list
):
450 def PushFront(a_list
, elem
):
454 def PopToken(a_list
, token_type
=None):
455 token
= PopFront(a_list
)
456 if token_type
is not None and token
.token_type
!= token_type
:
457 print('ERROR: %s expected at %s' % (token_type
, token
.start
))
458 print('ERROR: %s found instead' % (token
,))
464 def PeekToken(a_list
):
471 def ParseExpNode(token
):
472 python_exp
= re
.sub(r
'([_A-Za-z]\w*)', r
'self.GetValue("\1")', token
.value
)
473 return ExpNode(token
, python_exp
)
476 def ParseElseNode(tokens
):
477 def Pop(token_type
=None):
478 return PopToken(tokens
, token_type
)
480 next
= PeekToken(tokens
)
483 if next
.token_type
== '$else':
486 code_node
= ParseCodeNode(tokens
)
489 elif next
.token_type
== '$elif':
493 code_node
= ParseCodeNode(tokens
)
495 inner_else_node
= ParseElseNode(tokens
)
496 return CodeNode([IfNode(ParseExpNode(exp
), code_node
, inner_else_node
)])
497 elif not next
.value
.strip():
499 return ParseElseNode(tokens
)
504 def ParseAtomicCodeNode(tokens
):
505 def Pop(token_type
=None):
506 return PopToken(tokens
, token_type
)
508 head
= PopFront(tokens
)
511 return RawCodeNode(head
)
515 next
= PeekToken(tokens
)
516 if next
.token_type
== 'exp':
518 return VarNode(id_token
, ParseExpNode(exp_token
))
520 code_node
= ParseCodeNode(tokens
)
522 return VarNode(id_token
, code_node
)
525 next_token
= PeekToken(tokens
)
526 if next_token
.token_type
== 'code':
527 sep_token
= next_token
532 code_node
= ParseCodeNode(tokens
)
534 return ForNode(id_token
, sep_token
, code_node
)
536 exp_token
= Pop('code')
538 code_node
= ParseCodeNode(tokens
)
540 else_node
= ParseElseNode(tokens
)
541 return IfNode(ParseExpNode(exp_token
), code_node
, else_node
)
544 exp1_token
= Pop('exp')
546 exp2_token
= Pop('exp')
547 return RangeNode(id_token
, ParseExpNode(exp1_token
),
548 ParseExpNode(exp2_token
))
550 return ParseExpNode(Token(head
.start
+ 1, head
.end
, head
.value
[1:], 'id'))
552 return LiteralDollarNode(head
)
554 exp_token
= Pop('exp')
555 return ParseExpNode(exp_token
)
557 code_node
= ParseCodeNode(tokens
)
561 PushFront(tokens
, head
)
565 def ParseCodeNode(tokens
):
566 atomic_code_list
= []
570 atomic_code_node
= ParseAtomicCodeNode(tokens
)
572 atomic_code_list
.append(atomic_code_node
)
575 return CodeNode(atomic_code_list
)
578 def ParseToAST(pump_src_text
):
579 """Convert the given Pump source text into an AST."""
580 tokens
= list(Tokenize(pump_src_text
))
581 code_node
= ParseCodeNode(tokens
)
592 clone
.variables
= self
.variables
[:]
593 clone
.ranges
= self
.ranges
[:]
596 def PushVariable(self
, var
, value
):
597 # If value looks like an int, store it as an int.
599 int_value
= int(value
)
600 if ('%s' % int_value
) == value
:
604 self
.variables
[:0] = [(var
, value
)]
606 def PopVariable(self
):
607 self
.variables
[:1] = []
609 def PushRange(self
, var
, lower
, upper
):
610 self
.ranges
[:0] = [(var
, lower
, upper
)]
615 def GetValue(self
, identifier
):
616 for (var
, value
) in self
.variables
:
617 if identifier
== var
:
620 print('ERROR: meta variable %s is undefined.' % (identifier
,))
623 def EvalExp(self
, exp
):
625 result
= eval(exp
.python_exp
)
626 except Exception as e
: # pylint: disable=broad-except
627 print('ERROR: caught exception %s: %s' % (e
.__class
__.__name
__, e
))
628 print('ERROR: failed to evaluate meta expression %s at %s' %
629 (exp
.python_exp
, exp
.token
.start
))
633 def GetRange(self
, identifier
):
634 for (var
, lower
, upper
) in self
.ranges
:
635 if identifier
== var
:
636 return (lower
, upper
)
638 print('ERROR: range %s is undefined.' % (identifier
,))
646 def GetLastLine(self
):
647 index
= self
.string
.rfind('\n')
651 return self
.string
[index
+ 1:]
657 def RunAtomicCode(env
, node
, output
):
658 if isinstance(node
, VarNode
):
659 identifier
= node
.identifier
.value
.strip()
661 RunAtomicCode(env
.Clone(), node
.atomic_code
, result
)
662 value
= result
.string
663 env
.PushVariable(identifier
, value
)
664 elif isinstance(node
, RangeNode
):
665 identifier
= node
.identifier
.value
.strip()
666 lower
= int(env
.EvalExp(node
.exp1
))
667 upper
= int(env
.EvalExp(node
.exp2
))
668 env
.PushRange(identifier
, lower
, upper
)
669 elif isinstance(node
, ForNode
):
670 identifier
= node
.identifier
.value
.strip()
675 (lower
, upper
) = env
.GetRange(identifier
)
676 for i
in range(lower
, upper
+ 1):
677 new_env
= env
.Clone()
678 new_env
.PushVariable(identifier
, i
)
679 RunCode(new_env
, node
.code
, output
)
682 elif isinstance(node
, RawCodeNode
):
683 output
.Append(node
.raw_code
.value
)
684 elif isinstance(node
, IfNode
):
685 cond
= env
.EvalExp(node
.exp
)
687 RunCode(env
.Clone(), node
.then_branch
, output
)
688 elif node
.else_branch
is not None:
689 RunCode(env
.Clone(), node
.else_branch
, output
)
690 elif isinstance(node
, ExpNode
):
691 value
= env
.EvalExp(node
)
692 output
.Append('%s' % (value
,))
693 elif isinstance(node
, LiteralDollarNode
):
695 elif isinstance(node
, CodeNode
):
696 RunCode(env
.Clone(), node
, output
)
703 def RunCode(env
, code_node
, output
):
704 for atomic_code
in code_node
.atomic_code
:
705 RunAtomicCode(env
, atomic_code
, output
)
708 def IsSingleLineComment(cur_line
):
709 return '//' in cur_line
712 def IsInPreprocessorDirective(prev_lines
, cur_line
):
713 if cur_line
.lstrip().startswith('#'):
715 return prev_lines
and prev_lines
[-1].endswith('\\')
718 def WrapComment(line
, output
):
719 loc
= line
.find('//')
720 before_comment
= line
[:loc
].rstrip()
721 if before_comment
== '':
724 output
.append(before_comment
)
725 indent
= len(before_comment
) - len(before_comment
.lstrip())
726 prefix
= indent
*' ' + '// '
727 max_len
= 80 - len(prefix
)
728 comment
= line
[loc
+ 2:].strip()
729 segs
= [seg
for seg
in re
.split(r
'(\w+\W*)', comment
) if seg
!= '']
732 if len((cur_line
+ seg
).rstrip()) < max_len
:
735 if cur_line
.strip() != '':
736 output
.append(prefix
+ cur_line
.rstrip())
737 cur_line
= seg
.lstrip()
738 if cur_line
.strip() != '':
739 output
.append(prefix
+ cur_line
.strip())
742 def WrapCode(line
, line_concat
, output
):
743 indent
= len(line
) - len(line
.lstrip())
744 prefix
= indent
*' ' # Prefix of the current line
745 max_len
= 80 - indent
- len(line_concat
) # Maximum length of the current line
746 new_prefix
= prefix
+ 4*' ' # Prefix of a continuation line
747 new_max_len
= max_len
- 4 # Maximum length of a continuation line
748 # Prefers to wrap a line after a ',' or ';'.
749 segs
= [seg
for seg
in re
.split(r
'([^,;]+[,;]?)', line
.strip()) if seg
!= '']
750 cur_line
= '' # The current line without leading spaces.
752 # If the line is still too long, wrap at a space.
753 while cur_line
== '' and len(seg
.strip()) > max_len
:
755 split_at
= seg
.rfind(' ', 0, max_len
)
756 output
.append(prefix
+ seg
[:split_at
].strip() + line_concat
)
757 seg
= seg
[split_at
+ 1:]
759 max_len
= new_max_len
761 if len((cur_line
+ seg
).rstrip()) < max_len
:
762 cur_line
= (cur_line
+ seg
).lstrip()
764 output
.append(prefix
+ cur_line
.rstrip() + line_concat
)
766 max_len
= new_max_len
767 cur_line
= seg
.lstrip()
768 if cur_line
.strip() != '':
769 output
.append(prefix
+ cur_line
.strip())
772 def WrapPreprocessorDirective(line
, output
):
773 WrapCode(line
, ' \\', output
)
776 def WrapPlainCode(line
, output
):
777 WrapCode(line
, '', output
)
780 def IsMultiLineIWYUPragma(line
):
781 return re
.search(r
'/\* IWYU pragma: ', line
)
784 def IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
785 return (re
.match(r
'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line
) or
786 re
.match(r
'^#include\s', line
) or
787 # Don't break IWYU pragmas, either; that causes iwyu.py problems.
788 re
.search(r
'// IWYU pragma: ', line
))
791 def WrapLongLine(line
, output
):
795 elif IsSingleLineComment(line
):
796 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
797 # The style guide made an exception to allow long header guard lines,
798 # includes and IWYU pragmas.
801 WrapComment(line
, output
)
802 elif IsInPreprocessorDirective(output
, line
):
803 if IsHeaderGuardIncludeOrOneLineIWYUPragma(line
):
804 # The style guide made an exception to allow long header guard lines,
805 # includes and IWYU pragmas.
808 WrapPreprocessorDirective(line
, output
)
809 elif IsMultiLineIWYUPragma(line
):
812 WrapPlainCode(line
, output
)
815 def BeautifyCode(string
):
816 lines
= string
.splitlines()
819 WrapLongLine(line
, output
)
820 output2
= [line
.rstrip() for line
in output
]
821 return '\n'.join(output2
) + '\n'
824 def ConvertFromPumpSource(src_text
):
825 """Return the text generated from the given Pump source text."""
826 ast
= ParseToAST(StripMetaComments(src_text
))
828 RunCode(Env(), ast
, output
)
829 return BeautifyCode(output
.string
)
838 output_str
= ConvertFromPumpSource(io
.open(file_path
, 'r').read())
839 if file_path
.endswith('.pump'):
840 output_file_path
= file_path
[:-5]
842 output_file_path
= '-'
843 if output_file_path
== '-':
846 output_file
= io
.open(output_file_path
, 'w')
847 output_file
.write(u
'// This file was GENERATED by command:\n')
848 output_file
.write(u
'// %s %s\n' %
849 (os
.path
.basename(__file__
), os
.path
.basename(file_path
)))
850 output_file
.write(u
'// DO NOT EDIT BY HAND!!!\n\n')
851 output_file
.write(output_str
)
855 if __name__
== '__main__':