2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
19 from __future__
import print_function
21 import Common
.LongFilePathOs
as os
25 from CLexer
import CLexer
26 from CParser
import CParser
29 from CodeFragment
import Comment
30 from CodeFragment
import PP_Directive
31 from ParserWarning
import Warning
34 ##define T_CHAR_SPACE ' '
35 ##define T_CHAR_NULL '\0'
36 ##define T_CHAR_CR '\r'
37 ##define T_CHAR_TAB '\t'
38 ##define T_CHAR_LF '\n'
39 ##define T_CHAR_SLASH '/'
40 ##define T_CHAR_BACKSLASH '\\'
41 ##define T_CHAR_DOUBLE_QUOTE '\"'
42 ##define T_CHAR_SINGLE_QUOTE '\''
43 ##define T_CHAR_STAR '*'
44 ##define T_CHAR_HASH '#'
46 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
47 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
48 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
50 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
52 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
54 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
56 ## The collector for source code fragments.
58 # PreprocessFile method should be called prior to ParseFile
60 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
61 # Get*** procedures mean these procedures will make judgement on current token only.
63 class CodeFragmentCollector
:
66 # @param self The object pointer
67 # @param FileName The file that to be parsed
69 def __init__(self
, FileName
):
70 self
.Profile
= FileProfile
.FileProfile(FileName
)
71 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
72 self
.FileName
= FileName
73 self
.CurrentLineNumber
= 1
74 self
.CurrentOffsetWithinLine
= 0
77 self
.__SkippedChars
= ""
79 ## __EndOfFile() method
81 # Judge current buffer pos is at file end
83 # @param self The object pointer
84 # @retval True Current File buffer position is at file end
85 # @retval False Current File buffer position is NOT at file end
87 def __EndOfFile(self
):
88 NumberOfLines
= len(self
.Profile
.FileLinesList
)
89 SizeOfLastLine
= NumberOfLines
91 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
93 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
95 elif self
.CurrentLineNumber
> NumberOfLines
:
100 ## __EndOfLine() method
102 # Judge current buffer pos is at line end
104 # @param self The object pointer
105 # @retval True Current File buffer position is at line end
106 # @retval False Current File buffer position is NOT at line end
108 def __EndOfLine(self
):
109 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
110 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
117 # Reset file data buffer to the initial state
119 # @param self The object pointer
122 self
.CurrentLineNumber
= 1
123 self
.CurrentOffsetWithinLine
= 0
125 ## __UndoOneChar() method
127 # Go back one char in the file buffer
129 # @param self The object pointer
130 # @retval True Successfully go back one char
131 # @retval False Not able to go back one char as file beginning reached
133 def __UndoOneChar(self
):
135 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
137 elif self
.CurrentOffsetWithinLine
== 0:
138 self
.CurrentLineNumber
-= 1
139 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
141 self
.CurrentOffsetWithinLine
-= 1
144 ## __GetOneChar() method
146 # Move forward one char in the file buffer
148 # @param self The object pointer
150 def __GetOneChar(self
):
151 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
152 self
.CurrentLineNumber
+= 1
153 self
.CurrentOffsetWithinLine
= 0
155 self
.CurrentOffsetWithinLine
+= 1
157 ## __CurrentChar() method
159 # Get the char pointed to by the file buffer pointer
161 # @param self The object pointer
162 # @retval Char Current char
164 def __CurrentChar(self
):
165 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
166 # if CurrentChar > 255:
167 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
170 ## __NextChar() method
172 # Get the one char pass the char pointed to by the file buffer pointer
174 # @param self The object pointer
175 # @retval Char Next char
177 def __NextChar(self
):
178 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
179 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
181 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
183 ## __SetCurrentCharValue() method
185 # Modify the value of current char
187 # @param self The object pointer
188 # @param Value The new value of current char
190 def __SetCurrentCharValue(self
, Value
):
191 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
193 ## __SetCharValue() method
195 # Modify the value of current char
197 # @param self The object pointer
198 # @param Value The new value of current char
200 def __SetCharValue(self
, Line
, Offset
, Value
):
201 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
203 ## __CurrentLine() method
205 # Get the list that contains current line contents
207 # @param self The object pointer
208 # @retval List current line contents
210 def __CurrentLine(self
):
211 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
213 ## __InsertComma() method
215 # Insert ',' to replace PP
217 # @param self The object pointer
218 # @retval List current line contents
220 def __InsertComma(self
, Line
):
223 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
224 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
225 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
228 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
231 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
234 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
237 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
239 ## PreprocessFile() method
241 # Preprocess file contents, replace comments with spaces.
242 # In the end, rewind the file buffer pointer to the beginning
243 # BUGBUG: No !include statement processing contained in this procedure
244 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
246 # @param self The object pointer
248 def PreprocessFile(self
):
252 DoubleSlashComment
= False
256 PPDirectiveObj
= None
257 # HashComment in quoted string " " is ignored.
259 InCharLiteral
= False
261 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
262 while not self
.__EndOfFile
():
264 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
265 InString
= not InString
267 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
268 InCharLiteral
= not InCharLiteral
269 # meet new line, then no longer in a comment for // and '#'
270 if self
.__CurrentChar
() == T_CHAR_LF
:
271 if HashComment
and PPDirectiveObj
is not None:
272 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
273 PPDirectiveObj
.Content
+= T_CHAR_LF
278 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
280 if InComment
and DoubleSlashComment
:
282 DoubleSlashComment
= False
283 CommentObj
.Content
+= T_CHAR_LF
284 CommentObj
.EndPos
= EndLinePos
285 FileProfile
.CommentList
.append(CommentObj
)
287 if InComment
and HashComment
and not PPExtend
:
290 PPDirectiveObj
.Content
+= T_CHAR_LF
291 PPDirectiveObj
.EndPos
= EndLinePos
292 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
293 PPDirectiveObj
= None
295 if InString
or InCharLiteral
:
296 CurrentLine
= "".join(self
.__CurrentLine
())
297 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
298 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
299 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
301 if InComment
and not DoubleSlashComment
and not HashComment
:
302 CommentObj
.Content
+= T_CHAR_LF
303 self
.CurrentLineNumber
+= 1
304 self
.CurrentOffsetWithinLine
= 0
305 # check for */ comment end
306 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
307 CommentObj
.Content
+= self
.__CurrentChar
()
308 # self.__SetCurrentCharValue(T_CHAR_SPACE)
310 CommentObj
.Content
+= self
.__CurrentChar
()
311 # self.__SetCurrentCharValue(T_CHAR_SPACE)
312 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
313 FileProfile
.CommentList
.append(CommentObj
)
317 # set comments to spaces
320 # // follows hash PP directive
321 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
324 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
325 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
326 PPDirectiveObj
= None
329 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
331 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
333 CommentObj
.Content
+= self
.__CurrentChar
()
334 # self.__SetCurrentCharValue(T_CHAR_SPACE)
336 # check for // comment
337 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
339 DoubleSlashComment
= True
340 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
341 # check for '#' comment
342 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
345 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
346 # check for /* comment start
347 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
348 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
349 CommentObj
.Content
+= self
.__CurrentChar
()
350 # self.__SetCurrentCharValue( T_CHAR_SPACE)
352 CommentObj
.Content
+= self
.__CurrentChar
()
353 # self.__SetCurrentCharValue( T_CHAR_SPACE)
359 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
361 if InComment
and DoubleSlashComment
:
362 CommentObj
.EndPos
= EndLinePos
363 FileProfile
.CommentList
.append(CommentObj
)
364 if InComment
and HashComment
and not PPExtend
:
365 PPDirectiveObj
.EndPos
= EndLinePos
366 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
370 def PreprocessFileWithClear(self
):
374 DoubleSlashComment
= False
378 PPDirectiveObj
= None
379 # HashComment in quoted string " " is ignored.
381 InCharLiteral
= False
383 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
384 while not self
.__EndOfFile
():
386 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
387 InString
= not InString
389 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
390 InCharLiteral
= not InCharLiteral
391 # meet new line, then no longer in a comment for // and '#'
392 if self
.__CurrentChar
() == T_CHAR_LF
:
393 if HashComment
and PPDirectiveObj
is not None:
394 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
395 PPDirectiveObj
.Content
+= T_CHAR_LF
400 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
402 if InComment
and DoubleSlashComment
:
404 DoubleSlashComment
= False
405 CommentObj
.Content
+= T_CHAR_LF
406 CommentObj
.EndPos
= EndLinePos
407 FileProfile
.CommentList
.append(CommentObj
)
409 if InComment
and HashComment
and not PPExtend
:
412 PPDirectiveObj
.Content
+= T_CHAR_LF
413 PPDirectiveObj
.EndPos
= EndLinePos
414 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
415 PPDirectiveObj
= None
417 if InString
or InCharLiteral
:
418 CurrentLine
= "".join(self
.__CurrentLine
())
419 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
420 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
421 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
423 if InComment
and not DoubleSlashComment
and not HashComment
:
424 CommentObj
.Content
+= T_CHAR_LF
425 self
.CurrentLineNumber
+= 1
426 self
.CurrentOffsetWithinLine
= 0
427 # check for */ comment end
428 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
429 CommentObj
.Content
+= self
.__CurrentChar
()
430 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
432 CommentObj
.Content
+= self
.__CurrentChar
()
433 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
434 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
435 FileProfile
.CommentList
.append(CommentObj
)
439 # set comments to spaces
442 # // follows hash PP directive
443 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
446 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
447 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
448 PPDirectiveObj
= None
451 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
453 # self.__SetCurrentCharValue(T_CHAR_SPACE)
455 CommentObj
.Content
+= self
.__CurrentChar
()
456 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
458 # check for // comment
459 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
461 DoubleSlashComment
= True
462 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
463 # check for '#' comment
464 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
467 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
468 # check for /* comment start
469 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
470 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
471 CommentObj
.Content
+= self
.__CurrentChar
()
472 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
474 CommentObj
.Content
+= self
.__CurrentChar
()
475 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
481 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
483 if InComment
and DoubleSlashComment
:
484 CommentObj
.EndPos
= EndLinePos
485 FileProfile
.CommentList
.append(CommentObj
)
486 if InComment
and HashComment
and not PPExtend
:
487 PPDirectiveObj
.EndPos
= EndLinePos
488 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
491 ## ParseFile() method
493 # Parse the file profile buffer to extract fd, fv ... information
494 # Exception will be raised if syntax error found
496 # @param self The object pointer
499 self
.PreprocessFile()
500 # restore from ListOfList to ListOfString
501 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
502 FileStringContents
= ''
503 for fileLine
in self
.Profile
.FileLinesList
:
504 FileStringContents
+= fileLine
505 cStream
= antlr3
.StringStream(FileStringContents
)
506 lexer
= CLexer(cStream
)
507 tStream
= antlr3
.CommonTokenStream(lexer
)
508 parser
= CParser(tStream
)
509 parser
.translation_unit()
511 def ParseFileWithClearedPPDirective(self
):
512 self
.PreprocessFileWithClear()
513 # restore from ListOfList to ListOfString
514 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
515 FileStringContents
= ''
516 for fileLine
in self
.Profile
.FileLinesList
:
517 FileStringContents
+= fileLine
518 cStream
= antlr3
.StringStream(FileStringContents
)
519 lexer
= CLexer(cStream
)
520 tStream
= antlr3
.CommonTokenStream(lexer
)
521 parser
= CParser(tStream
)
522 parser
.translation_unit()
524 def CleanFileProfileBuffer(self
):
525 FileProfile
.CommentList
= []
526 FileProfile
.PPDirectiveList
= []
527 FileProfile
.PredicateExpressionList
= []
528 FileProfile
.FunctionDefinitionList
= []
529 FileProfile
.VariableDeclarationList
= []
530 FileProfile
.EnumerationDefinitionList
= []
531 FileProfile
.StructUnionDefinitionList
= []
532 FileProfile
.TypedefDefinitionList
= []
533 FileProfile
.FunctionCallingList
= []
535 def PrintFragments(self
):
537 print('################# ' + self
.FileName
+ '#####################')
539 print('/****************************************/')
540 print('/*************** COMMENTS ***************/')
541 print('/****************************************/')
542 for comment
in FileProfile
.CommentList
:
543 print(str(comment
.StartPos
) + comment
.Content
)
545 print('/****************************************/')
546 print('/********* PREPROCESS DIRECTIVES ********/')
547 print('/****************************************/')
548 for pp
in FileProfile
.PPDirectiveList
:
549 print(str(pp
.StartPos
) + pp
.Content
)
551 print('/****************************************/')
552 print('/********* VARIABLE DECLARATIONS ********/')
553 print('/****************************************/')
554 for var
in FileProfile
.VariableDeclarationList
:
555 print(str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
)
557 print('/****************************************/')
558 print('/********* FUNCTION DEFINITIONS *********/')
559 print('/****************************************/')
560 for func
in FileProfile
.FunctionDefinitionList
:
561 print(str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
))
563 print('/****************************************/')
564 print('/************ ENUMERATIONS **************/')
565 print('/****************************************/')
566 for enum
in FileProfile
.EnumerationDefinitionList
:
567 print(str(enum
.StartPos
) + enum
.Content
)
569 print('/****************************************/')
570 print('/*********** STRUCTS/UNIONS *************/')
571 print('/****************************************/')
572 for su
in FileProfile
.StructUnionDefinitionList
:
573 print(str(su
.StartPos
) + su
.Content
)
575 print('/****************************************/')
576 print('/********* PREDICATE EXPRESSIONS ********/')
577 print('/****************************************/')
578 for predexp
in FileProfile
.PredicateExpressionList
:
579 print(str(predexp
.StartPos
) + predexp
.Content
)
581 print('/****************************************/')
582 print('/************** TYPEDEFS ****************/')
583 print('/****************************************/')
584 for typedef
in FileProfile
.TypedefDefinitionList
:
585 print(str(typedef
.StartPos
) + typedef
.ToType
)
587 if __name__
== "__main__":
589 collector
= CodeFragmentCollector(sys
.argv
[1])
590 collector
.PreprocessFile()