2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
20 import Common
.LongFilePathOs
as os
24 from CLexer
import CLexer
25 from CParser
import CParser
28 from CodeFragment
import Comment
29 from CodeFragment
import PP_Directive
30 from ParserWarning
import Warning
33 ##define T_CHAR_SPACE ' '
34 ##define T_CHAR_NULL '\0'
35 ##define T_CHAR_CR '\r'
36 ##define T_CHAR_TAB '\t'
37 ##define T_CHAR_LF '\n'
38 ##define T_CHAR_SLASH '/'
39 ##define T_CHAR_BACKSLASH '\\'
40 ##define T_CHAR_DOUBLE_QUOTE '\"'
41 ##define T_CHAR_SINGLE_QUOTE '\''
42 ##define T_CHAR_STAR '*'
43 ##define T_CHAR_HASH '#'
45 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
46 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
47 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
49 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
51 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
53 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
55 ## The collector for source code fragments.
57 # PreprocessFile method should be called prior to ParseFile
59 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
60 # Get*** procedures mean these procedures will make judgement on current token only.
62 class CodeFragmentCollector
:
65 # @param self The object pointer
66 # @param FileName The file that to be parsed
68 def __init__(self
, FileName
):
69 self
.Profile
= FileProfile
.FileProfile(FileName
)
70 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
71 self
.FileName
= FileName
72 self
.CurrentLineNumber
= 1
73 self
.CurrentOffsetWithinLine
= 0
76 self
.__SkippedChars
= ""
78 ## __EndOfFile() method
80 # Judge current buffer pos is at file end
82 # @param self The object pointer
83 # @retval True Current File buffer position is at file end
84 # @retval False Current File buffer position is NOT at file end
86 def __EndOfFile(self
):
87 NumberOfLines
= len(self
.Profile
.FileLinesList
)
88 SizeOfLastLine
= NumberOfLines
90 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
92 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
94 elif self
.CurrentLineNumber
> NumberOfLines
:
99 ## __EndOfLine() method
101 # Judge current buffer pos is at line end
103 # @param self The object pointer
104 # @retval True Current File buffer position is at line end
105 # @retval False Current File buffer position is NOT at line end
107 def __EndOfLine(self
):
108 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
109 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
116 # Reset file data buffer to the initial state
118 # @param self The object pointer
121 self
.CurrentLineNumber
= 1
122 self
.CurrentOffsetWithinLine
= 0
124 ## __UndoOneChar() method
126 # Go back one char in the file buffer
128 # @param self The object pointer
129 # @retval True Successfully go back one char
130 # @retval False Not able to go back one char as file beginning reached
132 def __UndoOneChar(self
):
134 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
136 elif self
.CurrentOffsetWithinLine
== 0:
137 self
.CurrentLineNumber
-= 1
138 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
140 self
.CurrentOffsetWithinLine
-= 1
143 ## __GetOneChar() method
145 # Move forward one char in the file buffer
147 # @param self The object pointer
149 def __GetOneChar(self
):
150 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
151 self
.CurrentLineNumber
+= 1
152 self
.CurrentOffsetWithinLine
= 0
154 self
.CurrentOffsetWithinLine
+= 1
156 ## __CurrentChar() method
158 # Get the char pointed to by the file buffer pointer
160 # @param self The object pointer
161 # @retval Char Current char
163 def __CurrentChar(self
):
164 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
165 # if CurrentChar > 255:
166 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
169 ## __NextChar() method
171 # Get the one char pass the char pointed to by the file buffer pointer
173 # @param self The object pointer
174 # @retval Char Next char
176 def __NextChar(self
):
177 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
178 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
180 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
182 ## __SetCurrentCharValue() method
184 # Modify the value of current char
186 # @param self The object pointer
187 # @param Value The new value of current char
189 def __SetCurrentCharValue(self
, Value
):
190 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
192 ## __SetCharValue() method
194 # Modify the value of current char
196 # @param self The object pointer
197 # @param Value The new value of current char
199 def __SetCharValue(self
, Line
, Offset
, Value
):
200 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
202 ## __CurrentLine() method
204 # Get the list that contains current line contents
206 # @param self The object pointer
207 # @retval List current line contents
209 def __CurrentLine(self
):
210 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
212 ## __InsertComma() method
214 # Insert ',' to replace PP
216 # @param self The object pointer
217 # @retval List current line contents
219 def __InsertComma(self
, Line
):
222 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
223 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
224 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
227 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
230 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
233 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
236 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
238 ## PreprocessFile() method
240 # Preprocess file contents, replace comments with spaces.
241 # In the end, rewind the file buffer pointer to the beginning
242 # BUGBUG: No !include statement processing contained in this procedure
243 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
245 # @param self The object pointer
247 def PreprocessFile(self
):
251 DoubleSlashComment
= False
255 PPDirectiveObj
= None
256 # HashComment in quoted string " " is ignored.
258 InCharLiteral
= False
260 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
261 while not self
.__EndOfFile
():
263 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
264 InString
= not InString
266 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
267 InCharLiteral
= not InCharLiteral
268 # meet new line, then no longer in a comment for // and '#'
269 if self
.__CurrentChar
() == T_CHAR_LF
:
270 if HashComment
and PPDirectiveObj
is not None:
271 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
272 PPDirectiveObj
.Content
+= T_CHAR_LF
277 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
279 if InComment
and DoubleSlashComment
:
281 DoubleSlashComment
= False
282 CommentObj
.Content
+= T_CHAR_LF
283 CommentObj
.EndPos
= EndLinePos
284 FileProfile
.CommentList
.append(CommentObj
)
286 if InComment
and HashComment
and not PPExtend
:
289 PPDirectiveObj
.Content
+= T_CHAR_LF
290 PPDirectiveObj
.EndPos
= EndLinePos
291 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
292 PPDirectiveObj
= None
294 if InString
or InCharLiteral
:
295 CurrentLine
= "".join(self
.__CurrentLine
())
296 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
297 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
298 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
300 if InComment
and not DoubleSlashComment
and not HashComment
:
301 CommentObj
.Content
+= T_CHAR_LF
302 self
.CurrentLineNumber
+= 1
303 self
.CurrentOffsetWithinLine
= 0
304 # check for */ comment end
305 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
306 CommentObj
.Content
+= self
.__CurrentChar
()
307 # self.__SetCurrentCharValue(T_CHAR_SPACE)
309 CommentObj
.Content
+= self
.__CurrentChar
()
310 # self.__SetCurrentCharValue(T_CHAR_SPACE)
311 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
312 FileProfile
.CommentList
.append(CommentObj
)
316 # set comments to spaces
319 # // follows hash PP directive
320 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
323 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
324 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
325 PPDirectiveObj
= None
328 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
330 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
332 CommentObj
.Content
+= self
.__CurrentChar
()
333 # self.__SetCurrentCharValue(T_CHAR_SPACE)
335 # check for // comment
336 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
338 DoubleSlashComment
= True
339 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
340 # check for '#' comment
341 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
344 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
345 # check for /* comment start
346 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
347 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
348 CommentObj
.Content
+= self
.__CurrentChar
()
349 # self.__SetCurrentCharValue( T_CHAR_SPACE)
351 CommentObj
.Content
+= self
.__CurrentChar
()
352 # self.__SetCurrentCharValue( T_CHAR_SPACE)
358 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
360 if InComment
and DoubleSlashComment
:
361 CommentObj
.EndPos
= EndLinePos
362 FileProfile
.CommentList
.append(CommentObj
)
363 if InComment
and HashComment
and not PPExtend
:
364 PPDirectiveObj
.EndPos
= EndLinePos
365 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
369 def PreprocessFileWithClear(self
):
373 DoubleSlashComment
= False
377 PPDirectiveObj
= None
378 # HashComment in quoted string " " is ignored.
380 InCharLiteral
= False
382 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
383 while not self
.__EndOfFile
():
385 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
386 InString
= not InString
388 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
389 InCharLiteral
= not InCharLiteral
390 # meet new line, then no longer in a comment for // and '#'
391 if self
.__CurrentChar
() == T_CHAR_LF
:
392 if HashComment
and PPDirectiveObj
is not None:
393 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
394 PPDirectiveObj
.Content
+= T_CHAR_LF
399 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
401 if InComment
and DoubleSlashComment
:
403 DoubleSlashComment
= False
404 CommentObj
.Content
+= T_CHAR_LF
405 CommentObj
.EndPos
= EndLinePos
406 FileProfile
.CommentList
.append(CommentObj
)
408 if InComment
and HashComment
and not PPExtend
:
411 PPDirectiveObj
.Content
+= T_CHAR_LF
412 PPDirectiveObj
.EndPos
= EndLinePos
413 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
414 PPDirectiveObj
= None
416 if InString
or InCharLiteral
:
417 CurrentLine
= "".join(self
.__CurrentLine
())
418 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
419 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
420 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
422 if InComment
and not DoubleSlashComment
and not HashComment
:
423 CommentObj
.Content
+= T_CHAR_LF
424 self
.CurrentLineNumber
+= 1
425 self
.CurrentOffsetWithinLine
= 0
426 # check for */ comment end
427 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
428 CommentObj
.Content
+= self
.__CurrentChar
()
429 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
431 CommentObj
.Content
+= self
.__CurrentChar
()
432 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
433 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
434 FileProfile
.CommentList
.append(CommentObj
)
438 # set comments to spaces
441 # // follows hash PP directive
442 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
445 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
446 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
447 PPDirectiveObj
= None
450 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
452 # self.__SetCurrentCharValue(T_CHAR_SPACE)
454 CommentObj
.Content
+= self
.__CurrentChar
()
455 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
457 # check for // comment
458 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
460 DoubleSlashComment
= True
461 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
462 # check for '#' comment
463 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
466 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
467 # check for /* comment start
468 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
469 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
470 CommentObj
.Content
+= self
.__CurrentChar
()
471 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
473 CommentObj
.Content
+= self
.__CurrentChar
()
474 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
480 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
482 if InComment
and DoubleSlashComment
:
483 CommentObj
.EndPos
= EndLinePos
484 FileProfile
.CommentList
.append(CommentObj
)
485 if InComment
and HashComment
and not PPExtend
:
486 PPDirectiveObj
.EndPos
= EndLinePos
487 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
490 ## ParseFile() method
492 # Parse the file profile buffer to extract fd, fv ... information
493 # Exception will be raised if syntax error found
495 # @param self The object pointer
498 self
.PreprocessFile()
499 # restore from ListOfList to ListOfString
500 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
501 FileStringContents
= ''
502 for fileLine
in self
.Profile
.FileLinesList
:
503 FileStringContents
+= fileLine
504 cStream
= antlr3
.StringStream(FileStringContents
)
505 lexer
= CLexer(cStream
)
506 tStream
= antlr3
.CommonTokenStream(lexer
)
507 parser
= CParser(tStream
)
508 parser
.translation_unit()
510 def ParseFileWithClearedPPDirective(self
):
511 self
.PreprocessFileWithClear()
512 # restore from ListOfList to ListOfString
513 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
514 FileStringContents
= ''
515 for fileLine
in self
.Profile
.FileLinesList
:
516 FileStringContents
+= fileLine
517 cStream
= antlr3
.StringStream(FileStringContents
)
518 lexer
= CLexer(cStream
)
519 tStream
= antlr3
.CommonTokenStream(lexer
)
520 parser
= CParser(tStream
)
521 parser
.translation_unit()
523 def CleanFileProfileBuffer(self
):
524 FileProfile
.CommentList
= []
525 FileProfile
.PPDirectiveList
= []
526 FileProfile
.PredicateExpressionList
= []
527 FileProfile
.FunctionDefinitionList
= []
528 FileProfile
.VariableDeclarationList
= []
529 FileProfile
.EnumerationDefinitionList
= []
530 FileProfile
.StructUnionDefinitionList
= []
531 FileProfile
.TypedefDefinitionList
= []
532 FileProfile
.FunctionCallingList
= []
534 def PrintFragments(self
):
536 print '################# ' + self
.FileName
+ '#####################'
538 print '/****************************************/'
539 print '/*************** COMMENTS ***************/'
540 print '/****************************************/'
541 for comment
in FileProfile
.CommentList
:
542 print str(comment
.StartPos
) + comment
.Content
544 print '/****************************************/'
545 print '/********* PREPROCESS DIRECTIVES ********/'
546 print '/****************************************/'
547 for pp
in FileProfile
.PPDirectiveList
:
548 print str(pp
.StartPos
) + pp
.Content
550 print '/****************************************/'
551 print '/********* VARIABLE DECLARATIONS ********/'
552 print '/****************************************/'
553 for var
in FileProfile
.VariableDeclarationList
:
554 print str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
556 print '/****************************************/'
557 print '/********* FUNCTION DEFINITIONS *********/'
558 print '/****************************************/'
559 for func
in FileProfile
.FunctionDefinitionList
:
560 print str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
)
562 print '/****************************************/'
563 print '/************ ENUMERATIONS **************/'
564 print '/****************************************/'
565 for enum
in FileProfile
.EnumerationDefinitionList
:
566 print str(enum
.StartPos
) + enum
.Content
568 print '/****************************************/'
569 print '/*********** STRUCTS/UNIONS *************/'
570 print '/****************************************/'
571 for su
in FileProfile
.StructUnionDefinitionList
:
572 print str(su
.StartPos
) + su
.Content
574 print '/****************************************/'
575 print '/********* PREDICATE EXPRESSIONS ********/'
576 print '/****************************************/'
577 for predexp
in FileProfile
.PredicateExpressionList
:
578 print str(predexp
.StartPos
) + predexp
.Content
580 print '/****************************************/'
581 print '/************** TYPEDEFS ****************/'
582 print '/****************************************/'
583 for typedef
in FileProfile
.TypedefDefinitionList
:
584 print str(typedef
.StartPos
) + typedef
.ToType
586 if __name__
== "__main__":
588 collector
= CodeFragmentCollector(sys
.argv
[1])
589 collector
.PreprocessFile()