2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
19 from __future__
import print_function
20 from __future__
import absolute_import
22 import Common
.LongFilePathOs
as os
24 if sys
.version_info
.major
== 3:
25 import antlr4
as antlr
26 from Ecc
.CParser4
.CLexer
import CLexer
27 from Ecc
.CParser4
.CParser
import CParser
29 import antlr3
as antlr
30 antlr
.InputStream
= antlr
.StringStream
31 from Ecc
.CParser3
.CLexer
import CLexer
32 from Ecc
.CParser3
.CParser
import CParser
35 from Ecc
import FileProfile
36 from Ecc
.CodeFragment
import Comment
37 from Ecc
.CodeFragment
import PP_Directive
38 from Ecc
.ParserWarning
import Warning
41 ##define T_CHAR_SPACE ' '
42 ##define T_CHAR_NULL '\0'
43 ##define T_CHAR_CR '\r'
44 ##define T_CHAR_TAB '\t'
45 ##define T_CHAR_LF '\n'
46 ##define T_CHAR_SLASH '/'
47 ##define T_CHAR_BACKSLASH '\\'
48 ##define T_CHAR_DOUBLE_QUOTE '\"'
49 ##define T_CHAR_SINGLE_QUOTE '\''
50 ##define T_CHAR_STAR '*'
51 ##define T_CHAR_HASH '#'
53 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
54 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
55 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
57 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
59 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
61 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
63 ## The collector for source code fragments.
65 # PreprocessFile method should be called prior to ParseFile
67 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
68 # Get*** procedures mean these procedures will make judgement on current token only.
70 class CodeFragmentCollector
:
73 # @param self The object pointer
74 # @param FileName The file that to be parsed
76 def __init__(self
, FileName
):
77 self
.Profile
= FileProfile
.FileProfile(FileName
)
78 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
79 self
.FileName
= FileName
80 self
.CurrentLineNumber
= 1
81 self
.CurrentOffsetWithinLine
= 0
84 self
.__SkippedChars
= ""
86 ## __EndOfFile() method
88 # Judge current buffer pos is at file end
90 # @param self The object pointer
91 # @retval True Current File buffer position is at file end
92 # @retval False Current File buffer position is NOT at file end
94 def __EndOfFile(self
):
95 NumberOfLines
= len(self
.Profile
.FileLinesList
)
96 SizeOfLastLine
= NumberOfLines
98 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
100 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
102 elif self
.CurrentLineNumber
> NumberOfLines
:
107 ## __EndOfLine() method
109 # Judge current buffer pos is at line end
111 # @param self The object pointer
112 # @retval True Current File buffer position is at line end
113 # @retval False Current File buffer position is NOT at line end
115 def __EndOfLine(self
):
116 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
117 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
124 # Reset file data buffer to the initial state
126 # @param self The object pointer
129 self
.CurrentLineNumber
= 1
130 self
.CurrentOffsetWithinLine
= 0
132 ## __UndoOneChar() method
134 # Go back one char in the file buffer
136 # @param self The object pointer
137 # @retval True Successfully go back one char
138 # @retval False Not able to go back one char as file beginning reached
140 def __UndoOneChar(self
):
142 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
144 elif self
.CurrentOffsetWithinLine
== 0:
145 self
.CurrentLineNumber
-= 1
146 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
148 self
.CurrentOffsetWithinLine
-= 1
151 ## __GetOneChar() method
153 # Move forward one char in the file buffer
155 # @param self The object pointer
157 def __GetOneChar(self
):
158 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
159 self
.CurrentLineNumber
+= 1
160 self
.CurrentOffsetWithinLine
= 0
162 self
.CurrentOffsetWithinLine
+= 1
164 ## __CurrentChar() method
166 # Get the char pointed to by the file buffer pointer
168 # @param self The object pointer
169 # @retval Char Current char
171 def __CurrentChar(self
):
172 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
173 # if CurrentChar > 255:
174 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
177 ## __NextChar() method
179 # Get the one char pass the char pointed to by the file buffer pointer
181 # @param self The object pointer
182 # @retval Char Next char
184 def __NextChar(self
):
185 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
186 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
188 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
190 ## __SetCurrentCharValue() method
192 # Modify the value of current char
194 # @param self The object pointer
195 # @param Value The new value of current char
197 def __SetCurrentCharValue(self
, Value
):
198 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
200 ## __SetCharValue() method
202 # Modify the value of current char
204 # @param self The object pointer
205 # @param Value The new value of current char
207 def __SetCharValue(self
, Line
, Offset
, Value
):
208 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
210 ## __CurrentLine() method
212 # Get the list that contains current line contents
214 # @param self The object pointer
215 # @retval List current line contents
217 def __CurrentLine(self
):
218 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
220 ## __InsertComma() method
222 # Insert ',' to replace PP
224 # @param self The object pointer
225 # @retval List current line contents
227 def __InsertComma(self
, Line
):
230 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
231 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
232 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
235 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
238 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
241 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
244 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
246 ## PreprocessFile() method
248 # Preprocess file contents, replace comments with spaces.
249 # In the end, rewind the file buffer pointer to the beginning
250 # BUGBUG: No !include statement processing contained in this procedure
251 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
253 # @param self The object pointer
255 def PreprocessFile(self
):
259 DoubleSlashComment
= False
263 PPDirectiveObj
= None
264 # HashComment in quoted string " " is ignored.
266 InCharLiteral
= False
268 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
269 while not self
.__EndOfFile
():
271 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
272 InString
= not InString
274 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
275 InCharLiteral
= not InCharLiteral
276 # meet new line, then no longer in a comment for // and '#'
277 if self
.__CurrentChar
() == T_CHAR_LF
:
278 if HashComment
and PPDirectiveObj
is not None:
279 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
280 PPDirectiveObj
.Content
+= T_CHAR_LF
285 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
287 if InComment
and DoubleSlashComment
:
289 DoubleSlashComment
= False
290 CommentObj
.Content
+= T_CHAR_LF
291 CommentObj
.EndPos
= EndLinePos
292 FileProfile
.CommentList
.append(CommentObj
)
294 if InComment
and HashComment
and not PPExtend
:
297 PPDirectiveObj
.Content
+= T_CHAR_LF
298 PPDirectiveObj
.EndPos
= EndLinePos
299 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
300 PPDirectiveObj
= None
302 if InString
or InCharLiteral
:
303 CurrentLine
= "".join(self
.__CurrentLine
())
304 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
305 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
306 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
308 if InComment
and not DoubleSlashComment
and not HashComment
:
309 CommentObj
.Content
+= T_CHAR_LF
310 self
.CurrentLineNumber
+= 1
311 self
.CurrentOffsetWithinLine
= 0
312 # check for */ comment end
313 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
314 CommentObj
.Content
+= self
.__CurrentChar
()
315 # self.__SetCurrentCharValue(T_CHAR_SPACE)
317 CommentObj
.Content
+= self
.__CurrentChar
()
318 # self.__SetCurrentCharValue(T_CHAR_SPACE)
319 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
320 FileProfile
.CommentList
.append(CommentObj
)
324 # set comments to spaces
327 # // follows hash PP directive
328 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
331 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
332 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
333 PPDirectiveObj
= None
336 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
338 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
340 CommentObj
.Content
+= self
.__CurrentChar
()
341 # self.__SetCurrentCharValue(T_CHAR_SPACE)
343 # check for // comment
344 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
346 DoubleSlashComment
= True
347 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
348 # check for '#' comment
349 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
352 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
353 # check for /* comment start
354 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
355 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
356 CommentObj
.Content
+= self
.__CurrentChar
()
357 # self.__SetCurrentCharValue( T_CHAR_SPACE)
359 CommentObj
.Content
+= self
.__CurrentChar
()
360 # self.__SetCurrentCharValue( T_CHAR_SPACE)
366 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
368 if InComment
and DoubleSlashComment
:
369 CommentObj
.EndPos
= EndLinePos
370 FileProfile
.CommentList
.append(CommentObj
)
371 if InComment
and HashComment
and not PPExtend
:
372 PPDirectiveObj
.EndPos
= EndLinePos
373 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
377 def PreprocessFileWithClear(self
):
381 DoubleSlashComment
= False
385 PPDirectiveObj
= None
386 # HashComment in quoted string " " is ignored.
388 InCharLiteral
= False
390 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
391 while not self
.__EndOfFile
():
393 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
394 InString
= not InString
396 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
397 InCharLiteral
= not InCharLiteral
398 # meet new line, then no longer in a comment for // and '#'
399 if self
.__CurrentChar
() == T_CHAR_LF
:
400 if HashComment
and PPDirectiveObj
is not None:
401 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
402 PPDirectiveObj
.Content
+= T_CHAR_LF
407 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
409 if InComment
and DoubleSlashComment
:
411 DoubleSlashComment
= False
412 CommentObj
.Content
+= T_CHAR_LF
413 CommentObj
.EndPos
= EndLinePos
414 FileProfile
.CommentList
.append(CommentObj
)
416 if InComment
and HashComment
and not PPExtend
:
419 PPDirectiveObj
.Content
+= T_CHAR_LF
420 PPDirectiveObj
.EndPos
= EndLinePos
421 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
422 PPDirectiveObj
= None
424 if InString
or InCharLiteral
:
425 CurrentLine
= "".join(self
.__CurrentLine
())
426 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
427 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
428 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
430 if InComment
and not DoubleSlashComment
and not HashComment
:
431 CommentObj
.Content
+= T_CHAR_LF
432 self
.CurrentLineNumber
+= 1
433 self
.CurrentOffsetWithinLine
= 0
434 # check for */ comment end
435 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
436 CommentObj
.Content
+= self
.__CurrentChar
()
437 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
439 CommentObj
.Content
+= self
.__CurrentChar
()
440 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
441 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
442 FileProfile
.CommentList
.append(CommentObj
)
446 # set comments to spaces
449 # // follows hash PP directive
450 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
453 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
454 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
455 PPDirectiveObj
= None
458 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
460 # self.__SetCurrentCharValue(T_CHAR_SPACE)
462 CommentObj
.Content
+= self
.__CurrentChar
()
463 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
465 # check for // comment
466 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
468 DoubleSlashComment
= True
469 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
470 # check for '#' comment
471 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
474 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
475 # check for /* comment start
476 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
477 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
478 CommentObj
.Content
+= self
.__CurrentChar
()
479 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
481 CommentObj
.Content
+= self
.__CurrentChar
()
482 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
488 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
490 if InComment
and DoubleSlashComment
:
491 CommentObj
.EndPos
= EndLinePos
492 FileProfile
.CommentList
.append(CommentObj
)
493 if InComment
and HashComment
and not PPExtend
:
494 PPDirectiveObj
.EndPos
= EndLinePos
495 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
498 ## ParseFile() method
500 # Parse the file profile buffer to extract fd, fv ... information
501 # Exception will be raised if syntax error found
503 # @param self The object pointer
506 self
.PreprocessFile()
507 # restore from ListOfList to ListOfString
508 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
509 FileStringContents
= ''
510 for fileLine
in self
.Profile
.FileLinesList
:
511 FileStringContents
+= fileLine
512 cStream
= antlr
.InputStream(FileStringContents
)
513 lexer
= CLexer(cStream
)
514 tStream
= antlr
.CommonTokenStream(lexer
)
515 parser
= CParser(tStream
)
516 parser
.translation_unit()
518 def ParseFileWithClearedPPDirective(self
):
519 self
.PreprocessFileWithClear()
520 # restore from ListOfList to ListOfString
521 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
522 FileStringContents
= ''
523 for fileLine
in self
.Profile
.FileLinesList
:
524 FileStringContents
+= fileLine
525 cStream
= antlr
.InputStream(FileStringContents
)
526 lexer
= CLexer(cStream
)
527 tStream
= antlr
.CommonTokenStream(lexer
)
528 parser
= CParser(tStream
)
529 parser
.translation_unit()
531 def CleanFileProfileBuffer(self
):
532 FileProfile
.CommentList
= []
533 FileProfile
.PPDirectiveList
= []
534 FileProfile
.PredicateExpressionList
= []
535 FileProfile
.FunctionDefinitionList
= []
536 FileProfile
.VariableDeclarationList
= []
537 FileProfile
.EnumerationDefinitionList
= []
538 FileProfile
.StructUnionDefinitionList
= []
539 FileProfile
.TypedefDefinitionList
= []
540 FileProfile
.FunctionCallingList
= []
542 def PrintFragments(self
):
544 print('################# ' + self
.FileName
+ '#####################')
546 print('/****************************************/')
547 print('/*************** COMMENTS ***************/')
548 print('/****************************************/')
549 for comment
in FileProfile
.CommentList
:
550 print(str(comment
.StartPos
) + comment
.Content
)
552 print('/****************************************/')
553 print('/********* PREPROCESS DIRECTIVES ********/')
554 print('/****************************************/')
555 for pp
in FileProfile
.PPDirectiveList
:
556 print(str(pp
.StartPos
) + pp
.Content
)
558 print('/****************************************/')
559 print('/********* VARIABLE DECLARATIONS ********/')
560 print('/****************************************/')
561 for var
in FileProfile
.VariableDeclarationList
:
562 print(str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
)
564 print('/****************************************/')
565 print('/********* FUNCTION DEFINITIONS *********/')
566 print('/****************************************/')
567 for func
in FileProfile
.FunctionDefinitionList
:
568 print(str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
))
570 print('/****************************************/')
571 print('/************ ENUMERATIONS **************/')
572 print('/****************************************/')
573 for enum
in FileProfile
.EnumerationDefinitionList
:
574 print(str(enum
.StartPos
) + enum
.Content
)
576 print('/****************************************/')
577 print('/*********** STRUCTS/UNIONS *************/')
578 print('/****************************************/')
579 for su
in FileProfile
.StructUnionDefinitionList
:
580 print(str(su
.StartPos
) + su
.Content
)
582 print('/****************************************/')
583 print('/********* PREDICATE EXPRESSIONS ********/')
584 print('/****************************************/')
585 for predexp
in FileProfile
.PredicateExpressionList
:
586 print(str(predexp
.StartPos
) + predexp
.Content
)
588 print('/****************************************/')
589 print('/************** TYPEDEFS ****************/')
590 print('/****************************************/')
591 for typedef
in FileProfile
.TypedefDefinitionList
:
592 print(str(typedef
.StartPos
) + typedef
.ToType
)
594 if __name__
== "__main__":
596 collector
= CodeFragmentCollector(sys
.argv
[1])
597 collector
.PreprocessFile()