2 # preprocess source file
4 # Copyright (c) 2007 - 2018, Intel Corporation. All rights reserved.<BR>
6 # This program and the accompanying materials
7 # are licensed and made available under the terms and conditions of the BSD License
8 # which accompanies this distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
19 from __future__
import print_function
20 from __future__
import absolute_import
22 import Common
.LongFilePathOs
as os
26 from .CLexer
import CLexer
27 from .CParser
import CParser
29 from . import FileProfile
30 from .CodeFragment
import Comment
31 from .CodeFragment
import PP_Directive
32 from .ParserWarning
import Warning
35 ##define T_CHAR_SPACE ' '
36 ##define T_CHAR_NULL '\0'
37 ##define T_CHAR_CR '\r'
38 ##define T_CHAR_TAB '\t'
39 ##define T_CHAR_LF '\n'
40 ##define T_CHAR_SLASH '/'
41 ##define T_CHAR_BACKSLASH '\\'
42 ##define T_CHAR_DOUBLE_QUOTE '\"'
43 ##define T_CHAR_SINGLE_QUOTE '\''
44 ##define T_CHAR_STAR '*'
45 ##define T_CHAR_HASH '#'
47 (T_CHAR_SPACE
, T_CHAR_NULL
, T_CHAR_CR
, T_CHAR_TAB
, T_CHAR_LF
, T_CHAR_SLASH
, \
48 T_CHAR_BACKSLASH
, T_CHAR_DOUBLE_QUOTE
, T_CHAR_SINGLE_QUOTE
, T_CHAR_STAR
, T_CHAR_HASH
) = \
49 (' ', '\0', '\r', '\t', '\n', '/', '\\', '\"', '\'', '*', '#')
51 SEPERATOR_TUPLE
= ('=', '|', ',', '{', '}')
53 (T_COMMENT_TWO_SLASH
, T_COMMENT_SLASH_STAR
) = (0, 1)
55 (T_PP_INCLUDE
, T_PP_DEFINE
, T_PP_OTHERS
) = (0, 1, 2)
57 ## The collector for source code fragments.
59 # PreprocessFile method should be called prior to ParseFile
61 # GetNext*** procedures mean these procedures will get next token first, then make judgement.
62 # Get*** procedures mean these procedures will make judgement on current token only.
64 class CodeFragmentCollector
:
67 # @param self The object pointer
68 # @param FileName The file that to be parsed
70 def __init__(self
, FileName
):
71 self
.Profile
= FileProfile
.FileProfile(FileName
)
72 self
.Profile
.FileLinesList
.append(T_CHAR_LF
)
73 self
.FileName
= FileName
74 self
.CurrentLineNumber
= 1
75 self
.CurrentOffsetWithinLine
= 0
78 self
.__SkippedChars
= ""
80 ## __EndOfFile() method
82 # Judge current buffer pos is at file end
84 # @param self The object pointer
85 # @retval True Current File buffer position is at file end
86 # @retval False Current File buffer position is NOT at file end
88 def __EndOfFile(self
):
89 NumberOfLines
= len(self
.Profile
.FileLinesList
)
90 SizeOfLastLine
= NumberOfLines
92 SizeOfLastLine
= len(self
.Profile
.FileLinesList
[-1])
94 if self
.CurrentLineNumber
== NumberOfLines
and self
.CurrentOffsetWithinLine
>= SizeOfLastLine
- 1:
96 elif self
.CurrentLineNumber
> NumberOfLines
:
101 ## __EndOfLine() method
103 # Judge current buffer pos is at line end
105 # @param self The object pointer
106 # @retval True Current File buffer position is at line end
107 # @retval False Current File buffer position is NOT at line end
109 def __EndOfLine(self
):
110 SizeOfCurrentLine
= len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1])
111 if self
.CurrentOffsetWithinLine
>= SizeOfCurrentLine
- 1:
118 # Reset file data buffer to the initial state
120 # @param self The object pointer
123 self
.CurrentLineNumber
= 1
124 self
.CurrentOffsetWithinLine
= 0
126 ## __UndoOneChar() method
128 # Go back one char in the file buffer
130 # @param self The object pointer
131 # @retval True Successfully go back one char
132 # @retval False Not able to go back one char as file beginning reached
134 def __UndoOneChar(self
):
136 if self
.CurrentLineNumber
== 1 and self
.CurrentOffsetWithinLine
== 0:
138 elif self
.CurrentOffsetWithinLine
== 0:
139 self
.CurrentLineNumber
-= 1
140 self
.CurrentOffsetWithinLine
= len(self
.__CurrentLine
()) - 1
142 self
.CurrentOffsetWithinLine
-= 1
145 ## __GetOneChar() method
147 # Move forward one char in the file buffer
149 # @param self The object pointer
151 def __GetOneChar(self
):
152 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
153 self
.CurrentLineNumber
+= 1
154 self
.CurrentOffsetWithinLine
= 0
156 self
.CurrentOffsetWithinLine
+= 1
158 ## __CurrentChar() method
160 # Get the char pointed to by the file buffer pointer
162 # @param self The object pointer
163 # @retval Char Current char
165 def __CurrentChar(self
):
166 CurrentChar
= self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
]
167 # if CurrentChar > 255:
168 # raise Warning("Non-Ascii char found At Line %d, offset %d" % (self.CurrentLineNumber, self.CurrentOffsetWithinLine), self.FileName, self.CurrentLineNumber)
171 ## __NextChar() method
173 # Get the one char pass the char pointed to by the file buffer pointer
175 # @param self The object pointer
176 # @retval Char Next char
178 def __NextChar(self
):
179 if self
.CurrentOffsetWithinLine
== len(self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]) - 1:
180 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
][0]
182 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
+ 1]
184 ## __SetCurrentCharValue() method
186 # Modify the value of current char
188 # @param self The object pointer
189 # @param Value The new value of current char
191 def __SetCurrentCharValue(self
, Value
):
192 self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1][self
.CurrentOffsetWithinLine
] = Value
194 ## __SetCharValue() method
196 # Modify the value of current char
198 # @param self The object pointer
199 # @param Value The new value of current char
201 def __SetCharValue(self
, Line
, Offset
, Value
):
202 self
.Profile
.FileLinesList
[Line
- 1][Offset
] = Value
204 ## __CurrentLine() method
206 # Get the list that contains current line contents
208 # @param self The object pointer
209 # @retval List current line contents
211 def __CurrentLine(self
):
212 return self
.Profile
.FileLinesList
[self
.CurrentLineNumber
- 1]
214 ## __InsertComma() method
216 # Insert ',' to replace PP
218 # @param self The object pointer
219 # @retval List current line contents
221 def __InsertComma(self
, Line
):
224 if self
.Profile
.FileLinesList
[Line
- 1][0] != T_CHAR_HASH
:
225 BeforeHashPart
= str(self
.Profile
.FileLinesList
[Line
- 1]).split(T_CHAR_HASH
)[0]
226 if BeforeHashPart
.rstrip().endswith(T_CHAR_COMMA
) or BeforeHashPart
.rstrip().endswith(';'):
229 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(','):
232 if Line
- 2 >= 0 and str(self
.Profile
.FileLinesList
[Line
- 2]).rstrip().endswith(';'):
235 if str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(',') or str(self
.Profile
.FileLinesList
[Line
]).lstrip().startswith(';'):
238 self
.Profile
.FileLinesList
[Line
- 1].insert(self
.CurrentOffsetWithinLine
, ',')
240 ## PreprocessFile() method
242 # Preprocess file contents, replace comments with spaces.
243 # In the end, rewind the file buffer pointer to the beginning
244 # BUGBUG: No !include statement processing contained in this procedure
245 # !include statement should be expanded at the same FileLinesList[CurrentLineNumber - 1]
247 # @param self The object pointer
249 def PreprocessFile(self
):
253 DoubleSlashComment
= False
257 PPDirectiveObj
= None
258 # HashComment in quoted string " " is ignored.
260 InCharLiteral
= False
262 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
263 while not self
.__EndOfFile
():
265 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
266 InString
= not InString
268 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
269 InCharLiteral
= not InCharLiteral
270 # meet new line, then no longer in a comment for // and '#'
271 if self
.__CurrentChar
() == T_CHAR_LF
:
272 if HashComment
and PPDirectiveObj
is not None:
273 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
274 PPDirectiveObj
.Content
+= T_CHAR_LF
279 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
281 if InComment
and DoubleSlashComment
:
283 DoubleSlashComment
= False
284 CommentObj
.Content
+= T_CHAR_LF
285 CommentObj
.EndPos
= EndLinePos
286 FileProfile
.CommentList
.append(CommentObj
)
288 if InComment
and HashComment
and not PPExtend
:
291 PPDirectiveObj
.Content
+= T_CHAR_LF
292 PPDirectiveObj
.EndPos
= EndLinePos
293 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
294 PPDirectiveObj
= None
296 if InString
or InCharLiteral
:
297 CurrentLine
= "".join(self
.__CurrentLine
())
298 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
299 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
300 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
302 if InComment
and not DoubleSlashComment
and not HashComment
:
303 CommentObj
.Content
+= T_CHAR_LF
304 self
.CurrentLineNumber
+= 1
305 self
.CurrentOffsetWithinLine
= 0
306 # check for */ comment end
307 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
308 CommentObj
.Content
+= self
.__CurrentChar
()
309 # self.__SetCurrentCharValue(T_CHAR_SPACE)
311 CommentObj
.Content
+= self
.__CurrentChar
()
312 # self.__SetCurrentCharValue(T_CHAR_SPACE)
313 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
314 FileProfile
.CommentList
.append(CommentObj
)
318 # set comments to spaces
321 # // follows hash PP directive
322 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
325 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
326 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
327 PPDirectiveObj
= None
330 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
332 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
334 CommentObj
.Content
+= self
.__CurrentChar
()
335 # self.__SetCurrentCharValue(T_CHAR_SPACE)
337 # check for // comment
338 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
340 DoubleSlashComment
= True
341 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
342 # check for '#' comment
343 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
346 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
347 # check for /* comment start
348 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
349 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
350 CommentObj
.Content
+= self
.__CurrentChar
()
351 # self.__SetCurrentCharValue( T_CHAR_SPACE)
353 CommentObj
.Content
+= self
.__CurrentChar
()
354 # self.__SetCurrentCharValue( T_CHAR_SPACE)
360 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
362 if InComment
and DoubleSlashComment
:
363 CommentObj
.EndPos
= EndLinePos
364 FileProfile
.CommentList
.append(CommentObj
)
365 if InComment
and HashComment
and not PPExtend
:
366 PPDirectiveObj
.EndPos
= EndLinePos
367 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
371 def PreprocessFileWithClear(self
):
375 DoubleSlashComment
= False
379 PPDirectiveObj
= None
380 # HashComment in quoted string " " is ignored.
382 InCharLiteral
= False
384 self
.Profile
.FileLinesList
= [list(s
) for s
in self
.Profile
.FileLinesListFromFile
]
385 while not self
.__EndOfFile
():
387 if not InComment
and self
.__CurrentChar
() == T_CHAR_DOUBLE_QUOTE
:
388 InString
= not InString
390 if not InComment
and self
.__CurrentChar
() == T_CHAR_SINGLE_QUOTE
:
391 InCharLiteral
= not InCharLiteral
392 # meet new line, then no longer in a comment for // and '#'
393 if self
.__CurrentChar
() == T_CHAR_LF
:
394 if HashComment
and PPDirectiveObj
is not None:
395 if PPDirectiveObj
.Content
.rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
396 PPDirectiveObj
.Content
+= T_CHAR_LF
401 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
403 if InComment
and DoubleSlashComment
:
405 DoubleSlashComment
= False
406 CommentObj
.Content
+= T_CHAR_LF
407 CommentObj
.EndPos
= EndLinePos
408 FileProfile
.CommentList
.append(CommentObj
)
410 if InComment
and HashComment
and not PPExtend
:
413 PPDirectiveObj
.Content
+= T_CHAR_LF
414 PPDirectiveObj
.EndPos
= EndLinePos
415 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
416 PPDirectiveObj
= None
418 if InString
or InCharLiteral
:
419 CurrentLine
= "".join(self
.__CurrentLine
())
420 if CurrentLine
.rstrip(T_CHAR_LF
).rstrip(T_CHAR_CR
).endswith(T_CHAR_BACKSLASH
):
421 SlashIndex
= CurrentLine
.rindex(T_CHAR_BACKSLASH
)
422 self
.__SetCharValue
(self
.CurrentLineNumber
, SlashIndex
, T_CHAR_SPACE
)
424 if InComment
and not DoubleSlashComment
and not HashComment
:
425 CommentObj
.Content
+= T_CHAR_LF
426 self
.CurrentLineNumber
+= 1
427 self
.CurrentOffsetWithinLine
= 0
428 # check for */ comment end
429 elif InComment
and not DoubleSlashComment
and not HashComment
and self
.__CurrentChar
() == T_CHAR_STAR
and self
.__NextChar
() == T_CHAR_SLASH
:
430 CommentObj
.Content
+= self
.__CurrentChar
()
431 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
433 CommentObj
.Content
+= self
.__CurrentChar
()
434 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
435 CommentObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
436 FileProfile
.CommentList
.append(CommentObj
)
440 # set comments to spaces
443 # // follows hash PP directive
444 if self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
447 PPDirectiveObj
.EndPos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
- 1)
448 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
449 PPDirectiveObj
= None
452 PPDirectiveObj
.Content
+= self
.__CurrentChar
()
454 # self.__SetCurrentCharValue(T_CHAR_SPACE)
456 CommentObj
.Content
+= self
.__CurrentChar
()
457 self
.__SetCurrentCharValue
(T_CHAR_SPACE
)
459 # check for // comment
460 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_SLASH
:
462 DoubleSlashComment
= True
463 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_TWO_SLASH
)
464 # check for '#' comment
465 elif self
.__CurrentChar
() == T_CHAR_HASH
and not InString
and not InCharLiteral
:
468 PPDirectiveObj
= PP_Directive('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None)
469 # check for /* comment start
470 elif self
.__CurrentChar
() == T_CHAR_SLASH
and self
.__NextChar
() == T_CHAR_STAR
:
471 CommentObj
= Comment('', (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
), None, T_COMMENT_SLASH_STAR
)
472 CommentObj
.Content
+= self
.__CurrentChar
()
473 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
475 CommentObj
.Content
+= self
.__CurrentChar
()
476 self
.__SetCurrentCharValue
( T_CHAR_SPACE
)
482 EndLinePos
= (self
.CurrentLineNumber
, self
.CurrentOffsetWithinLine
)
484 if InComment
and DoubleSlashComment
:
485 CommentObj
.EndPos
= EndLinePos
486 FileProfile
.CommentList
.append(CommentObj
)
487 if InComment
and HashComment
and not PPExtend
:
488 PPDirectiveObj
.EndPos
= EndLinePos
489 FileProfile
.PPDirectiveList
.append(PPDirectiveObj
)
492 ## ParseFile() method
494 # Parse the file profile buffer to extract fd, fv ... information
495 # Exception will be raised if syntax error found
497 # @param self The object pointer
500 self
.PreprocessFile()
501 # restore from ListOfList to ListOfString
502 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
503 FileStringContents
= ''
504 for fileLine
in self
.Profile
.FileLinesList
:
505 FileStringContents
+= fileLine
506 cStream
= antlr3
.StringStream(FileStringContents
)
507 lexer
= CLexer(cStream
)
508 tStream
= antlr3
.CommonTokenStream(lexer
)
509 parser
= CParser(tStream
)
510 parser
.translation_unit()
512 def ParseFileWithClearedPPDirective(self
):
513 self
.PreprocessFileWithClear()
514 # restore from ListOfList to ListOfString
515 self
.Profile
.FileLinesList
= ["".join(list) for list in self
.Profile
.FileLinesList
]
516 FileStringContents
= ''
517 for fileLine
in self
.Profile
.FileLinesList
:
518 FileStringContents
+= fileLine
519 cStream
= antlr3
.StringStream(FileStringContents
)
520 lexer
= CLexer(cStream
)
521 tStream
= antlr3
.CommonTokenStream(lexer
)
522 parser
= CParser(tStream
)
523 parser
.translation_unit()
525 def CleanFileProfileBuffer(self
):
526 FileProfile
.CommentList
= []
527 FileProfile
.PPDirectiveList
= []
528 FileProfile
.PredicateExpressionList
= []
529 FileProfile
.FunctionDefinitionList
= []
530 FileProfile
.VariableDeclarationList
= []
531 FileProfile
.EnumerationDefinitionList
= []
532 FileProfile
.StructUnionDefinitionList
= []
533 FileProfile
.TypedefDefinitionList
= []
534 FileProfile
.FunctionCallingList
= []
536 def PrintFragments(self
):
538 print('################# ' + self
.FileName
+ '#####################')
540 print('/****************************************/')
541 print('/*************** COMMENTS ***************/')
542 print('/****************************************/')
543 for comment
in FileProfile
.CommentList
:
544 print(str(comment
.StartPos
) + comment
.Content
)
546 print('/****************************************/')
547 print('/********* PREPROCESS DIRECTIVES ********/')
548 print('/****************************************/')
549 for pp
in FileProfile
.PPDirectiveList
:
550 print(str(pp
.StartPos
) + pp
.Content
)
552 print('/****************************************/')
553 print('/********* VARIABLE DECLARATIONS ********/')
554 print('/****************************************/')
555 for var
in FileProfile
.VariableDeclarationList
:
556 print(str(var
.StartPos
) + var
.Modifier
+ ' '+ var
.Declarator
)
558 print('/****************************************/')
559 print('/********* FUNCTION DEFINITIONS *********/')
560 print('/****************************************/')
561 for func
in FileProfile
.FunctionDefinitionList
:
562 print(str(func
.StartPos
) + func
.Modifier
+ ' '+ func
.Declarator
+ ' ' + str(func
.NamePos
))
564 print('/****************************************/')
565 print('/************ ENUMERATIONS **************/')
566 print('/****************************************/')
567 for enum
in FileProfile
.EnumerationDefinitionList
:
568 print(str(enum
.StartPos
) + enum
.Content
)
570 print('/****************************************/')
571 print('/*********** STRUCTS/UNIONS *************/')
572 print('/****************************************/')
573 for su
in FileProfile
.StructUnionDefinitionList
:
574 print(str(su
.StartPos
) + su
.Content
)
576 print('/****************************************/')
577 print('/********* PREDICATE EXPRESSIONS ********/')
578 print('/****************************************/')
579 for predexp
in FileProfile
.PredicateExpressionList
:
580 print(str(predexp
.StartPos
) + predexp
.Content
)
582 print('/****************************************/')
583 print('/************** TYPEDEFS ****************/')
584 print('/****************************************/')
585 for typedef
in FileProfile
.TypedefDefinitionList
:
586 print(str(typedef
.StartPos
) + typedef
.ToType
)
588 if __name__
== "__main__":
590 collector
= CodeFragmentCollector(sys
.argv
[1])
591 collector
.PreprocessFile()