2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
20 import Common
.LongFilePathOs
as os
24 from CLexer
import CLexer
25 from CParser
import CParser
28 from CodeFragment
import Comment
29 from CodeFragment
import PP_Directive
30 from ParserWarning
import Warning
33 ##define T_CHAR_SPACE ' '
34 ##define T_CHAR_NULL '\0'
35 ##define T_CHAR_CR '\r'
36 ##define T_CHAR_TAB '\t'
37 ##define T_CHAR_LF '\n'
38 ##define T_CHAR_SLASH '/'
39 ##define T_CHAR_BACKSLASH '\\'
40 ##define T_CHAR_DOUBLE_QUOTE '\"'
41 ##define T_CHAR_SINGLE_QUOTE '\''
42 ##define T_CHAR_STAR '*'
43 ##define T_CHAR_HASH '#'
45 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
46 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
47 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
49 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
51 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
53 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
55 ## The collector for source code fragments.
57 # PreprocessFile method should be called prior to ParseFile
59 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
60 # Get*** procedures mean these procedures will make judgement on current token only.
62 class CodeFragmentCollector
:
65 # @param self The object pointer
66 # @param FileName The file that to be parsed
68 def __init__(self
, FileName
):
69 self
.Profile
= FileProfile
.FileProfile(FileName
)
70 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
71 self
.FileName
= FileName
72 self
.CurrentLineNumber
= 1
73 self
.CurrentOffsetWithinLine
= 0
76 self
.__SkippedChars
= ""
78 ## __SkipWhiteSpace() method
80 # Skip white spaces from current char, return number of chars skipped
82 # @param self The object pointer
83 # @retval Count The number of chars skipped
85 def __SkipWhiteSpace(self
):
87 while not self
.__EndOfFile
():
89 if self
.__CurrentChar
() in (T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_LF
, T_CHAR_SPACE
, T_CHAR_TAB
):
90 self
.__SkippedChars
+= str(self
.__CurrentChar
())
97 ## __EndOfFile() method
99 # Judge current buffer pos is at file end
101 # @param self The object pointer
102 # @retval True Current File buffer position is at file end
103 # @retval False Current File buffer position is NOT at file end
105 def __EndOfFile(self
):
106 NumberOfLines
= len(self
.Profile
.FileLinesList
)
107 SizeOfLastLine
= NumberOfLines
108 if NumberOfLines
> 0:
109 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
111 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
113 elif self
.CurrentLineNumber
> NumberOfLines
:
118 ## __EndOfLine() method
120 # Judge current buffer pos is at line end
122 # @param self The object pointer
123 # @retval True Current File buffer position is at line end
124 # @retval False Current File buffer position is NOT at line end
126 def __EndOfLine(self
):
127 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
128 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
135 # Reset file data buffer to the initial state
137 # @param self The object pointer
140 self
.CurrentLineNumber
= 1
141 self
.CurrentOffsetWithinLine
= 0
143 ## __UndoOneChar() method
145 # Go back one char in the file buffer
147 # @param self The object pointer
148 # @retval True Successfully go back one char
149 # @retval False Not able to go back one char as file beginning reached
151 def __UndoOneChar(self
):
153 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
155 elif self
.CurrentOffsetWithinLine
== 0:
156 self
.CurrentLineNumber
-= 1
157 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
159 self
.CurrentOffsetWithinLine
-= 1
162 ## __GetOneChar() method
164 # Move forward one char in the file buffer
166 # @param self The object pointer
168 def __GetOneChar(self
):
169 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
170 self
.CurrentLineNumber
+= 1
171 self
.CurrentOffsetWithinLine
= 0
173 self
.CurrentOffsetWithinLine
+= 1
175 ## __CurrentChar() method
177 # Get the char pointed to by the file buffer pointer
179 # @param self The object pointer
180 # @retval Char Current char
182 def __CurrentChar(self
):
183 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
184 # if CurrentChar > 255:
185 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
188 ## __NextChar() method
190 # Get the one char pass the char pointed to by the file buffer pointer
192 # @param self The object pointer
193 # @retval Char Next char
195 def __NextChar(self
):
196 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
197 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
199 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
201 ## __SetCurrentCharValue() method
203 # Modify the value of current char
205 # @param self The object pointer
206 # @param Value The new value of current char
208 def __SetCurrentCharValue(self
, Value
):
209 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
211 ## __SetCharValue() method
213 # Modify the value of current char
215 # @param self The object pointer
216 # @param Value The new value of current char
218 def __SetCharValue(self
, Line
, Offset
, Value
):
219 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
221 ## __CurrentLine() method
223 # Get the list that contains current line contents
225 # @param self The object pointer
226 # @retval List current line contents
228 def __CurrentLine(self
):
229 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
231 ## __InsertComma() method
233 # Insert ',' to replace PP
235 # @param self The object pointer
236 # @retval List current line contents
238 def __InsertComma(self
, Line
):
241 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
242 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
243 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
246 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
249 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
252 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
255 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
257 ## PreprocessFile() method
259 # Preprocess file contents, replace comments with spaces.
260 # In the end, rewind the file buffer pointer to the beginning
261 # BUGBUG: No !include statement processing contained in this procedure
262 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
264 # @param self The object pointer
266 def PreprocessFile(self
):
270 DoubleSlashComment
= False
274 PPDirectiveObj
= None
275 # HashComment in quoted string " " is ignored.
277 InCharLiteral
= False
279 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
280 while not self
.__EndOfFile
():
282 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
283 InString
= not InString
285 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
286 InCharLiteral
= not InCharLiteral
287 # meet new line, then no longer in a comment for // and '#'
288 if self
.__CurrentChar
() == T_CHAR_LF
:
289 if HashComment
and PPDirectiveObj
is not None:
290 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
291 PPDirectiveObj
.Content
+= T_CHAR_LF
296 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
298 if InComment
and DoubleSlashComment
:
300 DoubleSlashComment
= False
301 CommentObj
.Content
+= T_CHAR_LF
302 CommentObj
.EndPos
= EndLinePos
303 FileProfile
.CommentList
.append(CommentObj
)
305 if InComment
and HashComment
and not PPExtend
:
308 PPDirectiveObj
.Content
+= T_CHAR_LF
309 PPDirectiveObj
.EndPos
= EndLinePos
310 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
311 PPDirectiveObj
= None
313 if InString
or InCharLiteral
:
314 CurrentLine
= "".join(self
.__CurrentLine
())
315 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
316 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
317 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
319 if InComment
and not DoubleSlashComment
and not HashComment
:
320 CommentObj
.Content
+= T_CHAR_LF
321 self
.CurrentLineNumber
+= 1
322 self
.CurrentOffsetWithinLine
= 0
323 # check for */ comment end
324 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
325 CommentObj
.Content
+= self
.__CurrentChar
()
326 # self.__SetCurrentCharValue(T_CHAR_SPACE)
328 CommentObj
.Content
+= self
.__CurrentChar
()
329 # self.__SetCurrentCharValue(T_CHAR_SPACE)
330 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
331 FileProfile
.CommentList
.append(CommentObj
)
335 # set comments to spaces
338 # // follows hash PP directive
339 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
342 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
343 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
344 PPDirectiveObj
= None
347 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
349 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
351 CommentObj
.Content
+= self
.__CurrentChar
()
352 # self.__SetCurrentCharValue(T_CHAR_SPACE)
354 # check for // comment
355 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
357 DoubleSlashComment
= True
358 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
359 # check for '#' comment
360 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
363 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
364 # check for /* comment start
365 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
366 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
367 CommentObj
.Content
+= self
.__CurrentChar
()
368 # self.__SetCurrentCharValue( T_CHAR_SPACE)
370 CommentObj
.Content
+= self
.__CurrentChar
()
371 # self.__SetCurrentCharValue( T_CHAR_SPACE)
377 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
379 if InComment
and DoubleSlashComment
:
380 CommentObj
.EndPos
= EndLinePos
381 FileProfile
.CommentList
.append(CommentObj
)
382 if InComment
and HashComment
and not PPExtend
:
383 PPDirectiveObj
.EndPos
= EndLinePos
384 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
388 def PreprocessFileWithClear(self
):
392 DoubleSlashComment
= False
396 PPDirectiveObj
= None
397 # HashComment in quoted string " " is ignored.
399 InCharLiteral
= False
401 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
402 while not self
.__EndOfFile
():
404 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
405 InString
= not InString
407 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
408 InCharLiteral
= not InCharLiteral
409 # meet new line, then no longer in a comment for // and '#'
410 if self
.__CurrentChar
() == T_CHAR_LF
:
411 if HashComment
and PPDirectiveObj
is not None:
412 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
413 PPDirectiveObj
.Content
+= T_CHAR_LF
418 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
420 if InComment
and DoubleSlashComment
:
422 DoubleSlashComment
= False
423 CommentObj
.Content
+= T_CHAR_LF
424 CommentObj
.EndPos
= EndLinePos
425 FileProfile
.CommentList
.append(CommentObj
)
427 if InComment
and HashComment
and not PPExtend
:
430 PPDirectiveObj
.Content
+= T_CHAR_LF
431 PPDirectiveObj
.EndPos
= EndLinePos
432 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
433 PPDirectiveObj
= None
435 if InString
or InCharLiteral
:
436 CurrentLine
= "".join(self
.__CurrentLine
())
437 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
438 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
439 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
441 if InComment
and not DoubleSlashComment
and not HashComment
:
442 CommentObj
.Content
+= T_CHAR_LF
443 self
.CurrentLineNumber
+= 1
444 self
.CurrentOffsetWithinLine
= 0
445 # check for */ comment end
446 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
447 CommentObj
.Content
+= self
.__CurrentChar
()
448 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
450 CommentObj
.Content
+= self
.__CurrentChar
()
451 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
452 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
453 FileProfile
.CommentList
.append(CommentObj
)
457 # set comments to spaces
460 # // follows hash PP directive
461 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
464 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
465 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
466 PPDirectiveObj
= None
469 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
471 # self.__SetCurrentCharValue(T_CHAR_SPACE)
473 CommentObj
.Content
+= self
.__CurrentChar
()
474 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
476 # check for // comment
477 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
479 DoubleSlashComment
= True
480 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
481 # check for '#' comment
482 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
485 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
486 # check for /* comment start
487 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
488 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
489 CommentObj
.Content
+= self
.__CurrentChar
()
490 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
492 CommentObj
.Content
+= self
.__CurrentChar
()
493 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
499 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
501 if InComment
and DoubleSlashComment
:
502 CommentObj
.EndPos
= EndLinePos
503 FileProfile
.CommentList
.append(CommentObj
)
504 if InComment
and HashComment
and not PPExtend
:
505 PPDirectiveObj
.EndPos
= EndLinePos
506 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
509 ## ParseFile() method
511 # Parse the file profile buffer to extract fd, fv ... information
512 # Exception will be raised if syntax error found
514 # @param self The object pointer
517 self
.PreprocessFile()
518 # restore from ListOfList to ListOfString
519 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
520 FileStringContents
= ''
521 for fileLine
in self
.Profile
.FileLinesList
:
522 FileStringContents
+= fileLine
523 cStream
= antlr3
.StringStream(FileStringContents
)
524 lexer
= CLexer(cStream
)
525 tStream
= antlr3
.CommonTokenStream(lexer
)
526 parser
= CParser(tStream
)
527 parser
.translation_unit()
529 def ParseFileWithClearedPPDirective(self
):
530 self
.PreprocessFileWithClear()
531 # restore from ListOfList to ListOfString
532 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
533 FileStringContents
= ''
534 for fileLine
in self
.Profile
.FileLinesList
:
535 FileStringContents
+= fileLine
536 cStream
= antlr3
.StringStream(FileStringContents
)
537 lexer
= CLexer(cStream
)
538 tStream
= antlr3
.CommonTokenStream(lexer
)
539 parser
= CParser(tStream
)
540 parser
.translation_unit()
542 def CleanFileProfileBuffer(self
):
543 FileProfile
.CommentList
= []
544 FileProfile
.PPDirectiveList
= []
545 FileProfile
.PredicateExpressionList
= []
546 FileProfile
.FunctionDefinitionList
= []
547 FileProfile
.VariableDeclarationList
= []
548 FileProfile
.EnumerationDefinitionList
= []
549 FileProfile
.StructUnionDefinitionList
= []
550 FileProfile
.TypedefDefinitionList
= []
551 FileProfile
.FunctionCallingList
= []
553 def PrintFragments(self
):
555 print '################# ' + self
.FileName
+ '#####################'
557 print '/****************************************/'
558 print '/*************** COMMENTS ***************/'
559 print '/****************************************/'
560 for comment
in FileProfile
.CommentList
:
561 print str(comment
.StartPos
) + comment
.Content
563 print '/****************************************/'
564 print '/********* PREPROCESS DIRECTIVES ********/'
565 print '/****************************************/'
566 for pp
in FileProfile
.PPDirectiveList
:
567 print str(pp
.StartPos
) + pp
.Content
569 print '/****************************************/'
570 print '/********* VARIABLE DECLARATIONS ********/'
571 print '/****************************************/'
572 for var
in FileProfile
.VariableDeclarationList
:
573 print str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
575 print '/****************************************/'
576 print '/********* FUNCTION DEFINITIONS *********/'
577 print '/****************************************/'
578 for func
in FileProfile
.FunctionDefinitionList
:
579 print str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
)
581 print '/****************************************/'
582 print '/************ ENUMERATIONS **************/'
583 print '/****************************************/'
584 for enum
in FileProfile
.EnumerationDefinitionList
:
585 print str(enum
.StartPos
) + enum
.Content
587 print '/****************************************/'
588 print '/*********** STRUCTS/UNIONS *************/'
589 print '/****************************************/'
590 for su
in FileProfile
.StructUnionDefinitionList
:
591 print str(su
.StartPos
) + su
.Content
593 print '/****************************************/'
594 print '/********* PREDICATE EXPRESSIONS ********/'
595 print '/****************************************/'
596 for predexp
in FileProfile
.PredicateExpressionList
:
597 print str(predexp
.StartPos
) + predexp
.Content
599 print '/****************************************/'
600 print '/************** TYPEDEFS ****************/'
601 print '/****************************************/'
602 for typedef
in FileProfile
.TypedefDefinitionList
:
603 print str(typedef
.StartPos
) + typedef
.ToType
605 if __name__
== "__main__":
607 collector
= CodeFragmentCollector(sys
.argv
[1])
608 collector
.PreprocessFile()