2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # SPDX-License-Identifier: BSD-2-Clause-Patent
13 from __future__
import print_function
14 from __future__
import absolute_import
16 import Common
.LongFilePathOs
as os
18 if sys
.version_info
.major
== 3:
19 import antlr4
as antlr
20 from Ecc
.CParser4
.CLexer
import CLexer
21 from Ecc
.CParser4
.CParser
import CParser
23 import antlr3
as antlr
24 antlr
.InputStream
= antlr
.StringStream
25 from Ecc
.CParser3
.CLexer
import CLexer
26 from Ecc
.CParser3
.CParser
import CParser
29 from Ecc
import FileProfile
30 from Ecc
.CodeFragment
import Comment
31 from Ecc
.CodeFragment
import PP_Directive
32 from Ecc
.ParserWarning
import Warning
35 ##define T_CHAR_SPACE ' '
36 ##define T_CHAR_NULL '\0'
37 ##define T_CHAR_CR '\r'
38 ##define T_CHAR_TAB '\t'
39 ##define T_CHAR_LF '\n'
40 ##define T_CHAR_SLASH '/'
41 ##define T_CHAR_BACKSLASH '\\'
42 ##define T_CHAR_DOUBLE_QUOTE '\"'
43 ##define T_CHAR_SINGLE_QUOTE '\''
44 ##define T_CHAR_STAR '*'
45 ##define T_CHAR_HASH '#'
47 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
48 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
49 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
51 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
53 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
55 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
57 ## The collector for source code fragments.
59 # PreprocessFile method should be called prior to ParseFile
61 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
62 # Get*** procedures mean these procedures will make judgement on current token only.
64 class CodeFragmentCollector
:
67 # @param self The object pointer
68 # @param FileName The file that to be parsed
70 def __init__(self
, FileName
):
71 self
.Profile
= FileProfile
.FileProfile(FileName
)
72 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
73 self
.FileName
= FileName
74 self
.CurrentLineNumber
= 1
75 self
.CurrentOffsetWithinLine
= 0
76 self
.TokenReleaceList
= []
78 self
.__SkippedChars
= ""
80 ## __EndOfFile() method
82 # Judge current buffer pos is at file end
84 # @param self The object pointer
85 # @retval True Current File buffer position is at file end
86 # @retval False Current File buffer position is NOT at file end
88 def __EndOfFile(self
):
89 NumberOfLines
= len(self
.Profile
.FileLinesList
)
90 SizeOfLastLine
= NumberOfLines
92 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
94 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
96 elif self
.CurrentLineNumber
> NumberOfLines
:
101 ## __EndOfLine() method
103 # Judge current buffer pos is at line end
105 # @param self The object pointer
106 # @retval True Current File buffer position is at line end
107 # @retval False Current File buffer position is NOT at line end
109 def __EndOfLine(self
):
110 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
111 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
118 # Reset file data buffer to the initial state
120 # @param self The object pointer
123 self
.CurrentLineNumber
= 1
124 self
.CurrentOffsetWithinLine
= 0
126 ## __UndoOneChar() method
128 # Go back one char in the file buffer
130 # @param self The object pointer
131 # @retval True Successfully go back one char
132 # @retval False Not able to go back one char as file beginning reached
134 def __UndoOneChar(self
):
136 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
138 elif self
.CurrentOffsetWithinLine
== 0:
139 self
.CurrentLineNumber
-= 1
140 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
142 self
.CurrentOffsetWithinLine
-= 1
145 ## __GetOneChar() method
147 # Move forward one char in the file buffer
149 # @param self The object pointer
151 def __GetOneChar(self
):
152 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
153 self
.CurrentLineNumber
+= 1
154 self
.CurrentOffsetWithinLine
= 0
156 self
.CurrentOffsetWithinLine
+= 1
158 ## __CurrentChar() method
160 # Get the char pointed to by the file buffer pointer
162 # @param self The object pointer
163 # @retval Char Current char
165 def __CurrentChar(self
):
166 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
167 # if CurrentChar > 255:
168 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
171 ## __NextChar() method
173 # Get the one char pass the char pointed to by the file buffer pointer
175 # @param self The object pointer
176 # @retval Char Next char
178 def __NextChar(self
):
179 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
180 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
182 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
184 ## __SetCurrentCharValue() method
186 # Modify the value of current char
188 # @param self The object pointer
189 # @param Value The new value of current char
191 def __SetCurrentCharValue(self
, Value
):
192 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
194 ## __SetCharValue() method
196 # Modify the value of current char
198 # @param self The object pointer
199 # @param Value The new value of current char
201 def __SetCharValue(self
, Line
, Offset
, Value
):
202 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
204 ## __CurrentLine() method
206 # Get the list that contains current line contents
208 # @param self The object pointer
209 # @retval List current line contents
211 def __CurrentLine(self
):
212 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
214 ## __InsertComma() method
216 # Insert ',' to replace PP
218 # @param self The object pointer
219 # @retval List current line contents
221 def __InsertComma(self
, Line
):
224 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
225 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
226 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
229 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
232 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
235 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
238 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
240 ## PreprocessFile() method
242 # Preprocess file contents, replace comments with spaces.
243 # In the end, rewind the file buffer pointer to the beginning
244 # BUGBUG: No !include statement processing contained in this procedure
245 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
247 # @param self The object pointer
249 def PreprocessFile(self
):
253 DoubleSlashComment
= False
257 PPDirectiveObj
= None
258 # HashComment in quoted string " " is ignored.
260 InCharLiteral
= False
262 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
263 while not self
.__EndOfFile
():
265 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
266 InString
= not InString
268 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
269 InCharLiteral
= not InCharLiteral
270 # meet new line, then no longer in a comment for // and '#'
271 if self
.__CurrentChar
() == T_CHAR_LF
:
272 if HashComment
and PPDirectiveObj
is not None:
273 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
274 PPDirectiveObj
.Content
+= T_CHAR_LF
279 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
281 if InComment
and DoubleSlashComment
:
283 DoubleSlashComment
= False
284 CommentObj
.Content
+= T_CHAR_LF
285 CommentObj
.EndPos
= EndLinePos
286 FileProfile
.CommentList
.append(CommentObj
)
288 if InComment
and HashComment
and not PPExtend
:
291 PPDirectiveObj
.Content
+= T_CHAR_LF
292 PPDirectiveObj
.EndPos
= EndLinePos
293 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
294 PPDirectiveObj
= None
296 if InString
or InCharLiteral
:
297 CurrentLine
= "".join(self
.__CurrentLine
())
298 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
299 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
300 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
302 if InComment
and not DoubleSlashComment
and not HashComment
:
303 CommentObj
.Content
+= T_CHAR_LF
304 self
.CurrentLineNumber
+= 1
305 self
.CurrentOffsetWithinLine
= 0
306 # check for */ comment end
307 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
308 CommentObj
.Content
+= self
.__CurrentChar
()
309 # self.__SetCurrentCharValue(T_CHAR_SPACE)
311 CommentObj
.Content
+= self
.__CurrentChar
()
312 # self.__SetCurrentCharValue(T_CHAR_SPACE)
313 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
314 FileProfile
.CommentList
.append(CommentObj
)
318 # set comments to spaces
321 # // follows hash PP directive
322 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
325 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
326 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
327 PPDirectiveObj
= None
330 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
332 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
334 CommentObj
.Content
+= self
.__CurrentChar
()
335 # self.__SetCurrentCharValue(T_CHAR_SPACE)
337 # check for // comment
338 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
340 DoubleSlashComment
= True
341 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
342 # check for '#' comment
343 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
346 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
347 # check for /* comment start
348 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
349 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
350 CommentObj
.Content
+= self
.__CurrentChar
()
351 # self.__SetCurrentCharValue( T_CHAR_SPACE)
353 CommentObj
.Content
+= self
.__CurrentChar
()
354 # self.__SetCurrentCharValue( T_CHAR_SPACE)
360 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
362 if InComment
and DoubleSlashComment
:
363 CommentObj
.EndPos
= EndLinePos
364 FileProfile
.CommentList
.append(CommentObj
)
365 if InComment
and HashComment
and not PPExtend
:
366 PPDirectiveObj
.EndPos
= EndLinePos
367 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
371 def PreprocessFileWithClear(self
):
375 DoubleSlashComment
= False
379 PPDirectiveObj
= None
380 # HashComment in quoted string " " is ignored.
382 InCharLiteral
= False
384 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
385 while not self
.__EndOfFile
():
387 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
388 InString
= not InString
390 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
391 InCharLiteral
= not InCharLiteral
392 # meet new line, then no longer in a comment for // and '#'
393 if self
.__CurrentChar
() == T_CHAR_LF
:
394 if HashComment
and PPDirectiveObj
is not None:
395 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
396 PPDirectiveObj
.Content
+= T_CHAR_LF
401 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
403 if InComment
and DoubleSlashComment
:
405 DoubleSlashComment
= False
406 CommentObj
.Content
+= T_CHAR_LF
407 CommentObj
.EndPos
= EndLinePos
408 FileProfile
.CommentList
.append(CommentObj
)
410 if InComment
and HashComment
and not PPExtend
:
413 PPDirectiveObj
.Content
+= T_CHAR_LF
414 PPDirectiveObj
.EndPos
= EndLinePos
415 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
416 PPDirectiveObj
= None
418 if InString
or InCharLiteral
:
419 CurrentLine
= "".join(self
.__CurrentLine
())
420 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
421 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
422 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
424 if InComment
and not DoubleSlashComment
and not HashComment
:
425 CommentObj
.Content
+= T_CHAR_LF
426 self
.CurrentLineNumber
+= 1
427 self
.CurrentOffsetWithinLine
= 0
428 # check for */ comment end
429 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
430 CommentObj
.Content
+= self
.__CurrentChar
()
431 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
433 CommentObj
.Content
+= self
.__CurrentChar
()
434 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
435 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
436 FileProfile
.CommentList
.append(CommentObj
)
440 # set comments to spaces
443 # // follows hash PP directive
444 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
447 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
448 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
449 PPDirectiveObj
= None
452 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
454 # self.__SetCurrentCharValue(T_CHAR_SPACE)
456 CommentObj
.Content
+= self
.__CurrentChar
()
457 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
459 # check for // comment
460 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
462 DoubleSlashComment
= True
463 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
464 # check for '#' comment
465 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
468 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
469 # check for /* comment start
470 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
471 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
472 CommentObj
.Content
+= self
.__CurrentChar
()
473 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
475 CommentObj
.Content
+= self
.__CurrentChar
()
476 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
482 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
484 if InComment
and DoubleSlashComment
:
485 CommentObj
.EndPos
= EndLinePos
486 FileProfile
.CommentList
.append(CommentObj
)
487 if InComment
and HashComment
and not PPExtend
:
488 PPDirectiveObj
.EndPos
= EndLinePos
489 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
492 ## ParseFile() method
494 # Parse the file profile buffer to extract fd, fv ... information
495 # Exception will be raised if syntax error found
497 # @param self The object pointer
500 self
.PreprocessFile()
501 # restore from ListOfList to ListOfString
502 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
503 FileStringContents
= ''
504 for fileLine
in self
.Profile
.FileLinesList
:
505 FileStringContents
+= fileLine
506 for Token
in self
.TokenReleaceList
:
507 if Token
in FileStringContents
:
508 FileStringContents
= FileStringContents
.replace(Token
, 'TOKENSTRING')
509 cStream
= antlr
.InputStream(FileStringContents
)
510 lexer
= CLexer(cStream
)
511 tStream
= antlr
.CommonTokenStream(lexer
)
512 parser
= CParser(tStream
)
513 parser
.translation_unit()
515 def ParseFileWithClearedPPDirective(self
):
516 self
.PreprocessFileWithClear()
517 # restore from ListOfList to ListOfString
518 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
519 FileStringContents
= ''
520 for fileLine
in self
.Profile
.FileLinesList
:
521 FileStringContents
+= fileLine
522 cStream
= antlr
.InputStream(FileStringContents
)
523 lexer
= CLexer(cStream
)
524 tStream
= antlr
.CommonTokenStream(lexer
)
525 parser
= CParser(tStream
)
526 parser
.translation_unit()
528 def CleanFileProfileBuffer(self
):
529 FileProfile
.CommentList
= []
530 FileProfile
.PPDirectiveList
= []
531 FileProfile
.PredicateExpressionList
= []
532 FileProfile
.FunctionDefinitionList
= []
533 FileProfile
.VariableDeclarationList
= []
534 FileProfile
.EnumerationDefinitionList
= []
535 FileProfile
.StructUnionDefinitionList
= []
536 FileProfile
.TypedefDefinitionList
= []
537 FileProfile
.FunctionCallingList
= []
539 def PrintFragments(self
):
541 print('################# ' + self
.FileName
+ '#####################')
543 print('/****************************************/')
544 print('/*************** COMMENTS ***************/')
545 print('/****************************************/')
546 for comment
in FileProfile
.CommentList
:
547 print(str(comment
.StartPos
) + comment
.Content
)
549 print('/****************************************/')
550 print('/********* PREPROCESS DIRECTIVES ********/')
551 print('/****************************************/')
552 for pp
in FileProfile
.PPDirectiveList
:
553 print(str(pp
.StartPos
) + pp
.Content
)
555 print('/****************************************/')
556 print('/********* VARIABLE DECLARATIONS ********/')
557 print('/****************************************/')
558 for var
in FileProfile
.VariableDeclarationList
:
559 print(str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
)
561 print('/****************************************/')
562 print('/********* FUNCTION DEFINITIONS *********/')
563 print('/****************************************/')
564 for func
in FileProfile
.FunctionDefinitionList
:
565 print(str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
))
567 print('/****************************************/')
568 print('/************ ENUMERATIONS **************/')
569 print('/****************************************/')
570 for enum
in FileProfile
.EnumerationDefinitionList
:
571 print(str(enum
.StartPos
) + enum
.Content
)
573 print('/****************************************/')
574 print('/*********** STRUCTS/UNIONS *************/')
575 print('/****************************************/')
576 for su
in FileProfile
.StructUnionDefinitionList
:
577 print(str(su
.StartPos
) + su
.Content
)
579 print('/****************************************/')
580 print('/********* PREDICATE EXPRESSIONS ********/')
581 print('/****************************************/')
582 for predexp
in FileProfile
.PredicateExpressionList
:
583 print(str(predexp
.StartPos
) + predexp
.Content
)
585 print('/****************************************/')
586 print('/************** TYPEDEFS ****************/')
587 print('/****************************************/')
588 for typedef
in FileProfile
.TypedefDefinitionList
:
589 print(str(typedef
.StartPos
) + typedef
.ToType
)
591 if __name__
== "__main__":
593 collector
= CodeFragmentCollector(sys
.argv
[1])
594 collector
.PreprocessFile()