]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Source/Python/UPT/Library/CommentParsing.py
BaseTools/UPT: Porting UPT Tool from Python2 to Python3
[mirror_edk2.git] / BaseTools / Source / Python / UPT / Library / CommentParsing.py
1 ## @file
2 # This file is used to define comment parsing interface
3 #
4 # Copyright (c) 2011 - 2018, Intel Corporation. All rights reserved.<BR>
5 #
6 # This program and the accompanying materials are licensed and made available
7 # under the terms and conditions of the BSD License which accompanies this
8 # distribution. The full text of the license may be found at
9 # http://opensource.org/licenses/bsd-license.php
10 #
11 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
12 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
13 #
14
15 '''
16 CommentParsing
17 '''
18
19 ##
20 # Import Modules
21 #
22 import re
23
24 from Library.StringUtils import GetSplitValueList
25 from Library.StringUtils import CleanString2
26 from Library.DataType import HEADER_COMMENT_NOT_STARTED
27 from Library.DataType import TAB_COMMENT_SPLIT
28 from Library.DataType import HEADER_COMMENT_LICENSE
29 from Library.DataType import HEADER_COMMENT_ABSTRACT
30 from Library.DataType import HEADER_COMMENT_COPYRIGHT
31 from Library.DataType import HEADER_COMMENT_DESCRIPTION
32 from Library.DataType import TAB_SPACE_SPLIT
33 from Library.DataType import TAB_COMMA_SPLIT
34 from Library.DataType import SUP_MODULE_LIST
35 from Library.DataType import TAB_VALUE_SPLIT
36 from Library.DataType import TAB_PCD_VALIDRANGE
37 from Library.DataType import TAB_PCD_VALIDLIST
38 from Library.DataType import TAB_PCD_EXPRESSION
39 from Library.DataType import TAB_PCD_PROMPT
40 from Library.DataType import TAB_CAPHEX_START
41 from Library.DataType import TAB_HEX_START
42 from Library.DataType import PCD_ERR_CODE_MAX_SIZE
43 from Library.ExpressionValidate import IsValidRangeExpr
44 from Library.ExpressionValidate import IsValidListExpr
45 from Library.ExpressionValidate import IsValidLogicalExpr
46 from Object.POM.CommonObject import TextObject
47 from Object.POM.CommonObject import PcdErrorObject
48 import Logger.Log as Logger
49 from Logger.ToolError import FORMAT_INVALID
50 from Logger.ToolError import FORMAT_NOT_SUPPORTED
51 from Logger import StringTable as ST
52
53 ## ParseHeaderCommentSection
54 #
55 # Parse Header comment section lines, extract Abstract, Description, Copyright
56 # , License lines
57 #
58 # @param CommentList: List of (Comment, LineNumber)
59 # @param FileName: FileName of the comment
60 #
61 def ParseHeaderCommentSection(CommentList, FileName = None, IsBinaryHeader = False):
62 Abstract = ''
63 Description = ''
64 Copyright = ''
65 License = ''
66 EndOfLine = "\n"
67 if IsBinaryHeader:
68 STR_HEADER_COMMENT_START = "@BinaryHeader"
69 else:
70 STR_HEADER_COMMENT_START = "@file"
71 HeaderCommentStage = HEADER_COMMENT_NOT_STARTED
72
73 #
74 # first find the last copyright line
75 #
76 Last = 0
77 for Index in range(len(CommentList)-1, 0, -1):
78 Line = CommentList[Index][0]
79 if _IsCopyrightLine(Line):
80 Last = Index
81 break
82
83 for Item in CommentList:
84 Line = Item[0]
85 LineNo = Item[1]
86
87 if not Line.startswith(TAB_COMMENT_SPLIT) and Line:
88 Logger.Error("\nUPT", FORMAT_INVALID, ST.ERR_INVALID_COMMENT_FORMAT, FileName, Item[1])
89 Comment = CleanString2(Line)[1]
90 Comment = Comment.strip()
91 #
92 # if there are blank lines between License or Description, keep them as they would be
93 # indication of different block; or in the position that Abstract should be, also keep it
94 # as it indicates that no abstract
95 #
96 if not Comment and HeaderCommentStage not in [HEADER_COMMENT_LICENSE, \
97 HEADER_COMMENT_DESCRIPTION, HEADER_COMMENT_ABSTRACT]:
98 continue
99
100 if HeaderCommentStage == HEADER_COMMENT_NOT_STARTED:
101 if Comment.startswith(STR_HEADER_COMMENT_START):
102 HeaderCommentStage = HEADER_COMMENT_ABSTRACT
103 else:
104 License += Comment + EndOfLine
105 else:
106 if HeaderCommentStage == HEADER_COMMENT_ABSTRACT:
107 #
108 # in case there is no abstract and description
109 #
110 if not Comment:
111 HeaderCommentStage = HEADER_COMMENT_DESCRIPTION
112 elif _IsCopyrightLine(Comment):
113 Result, ErrMsg = _ValidateCopyright(Comment)
114 ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
115 Copyright += Comment + EndOfLine
116 HeaderCommentStage = HEADER_COMMENT_COPYRIGHT
117 else:
118 Abstract += Comment + EndOfLine
119 HeaderCommentStage = HEADER_COMMENT_DESCRIPTION
120 elif HeaderCommentStage == HEADER_COMMENT_DESCRIPTION:
121 #
122 # in case there is no description
123 #
124 if _IsCopyrightLine(Comment):
125 Result, ErrMsg = _ValidateCopyright(Comment)
126 ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
127 Copyright += Comment + EndOfLine
128 HeaderCommentStage = HEADER_COMMENT_COPYRIGHT
129 else:
130 Description += Comment + EndOfLine
131 elif HeaderCommentStage == HEADER_COMMENT_COPYRIGHT:
132 if _IsCopyrightLine(Comment):
133 Result, ErrMsg = _ValidateCopyright(Comment)
134 ValidateCopyright(Result, ST.WRN_INVALID_COPYRIGHT, FileName, LineNo, ErrMsg)
135 Copyright += Comment + EndOfLine
136 else:
137 #
138 # Contents after copyright line are license, those non-copyright lines in between
139 # copyright line will be discarded
140 #
141 if LineNo > Last:
142 if License:
143 License += EndOfLine
144 License += Comment + EndOfLine
145 HeaderCommentStage = HEADER_COMMENT_LICENSE
146 else:
147 if not Comment and not License:
148 continue
149 License += Comment + EndOfLine
150
151 return Abstract.strip(), Description.strip(), Copyright.strip(), License.strip()
152
153 ## _IsCopyrightLine
154 # check whether current line is copyright line, the criteria is whether there is case insensitive keyword "Copyright"
155 # followed by zero or more white space characters followed by a "(" character
156 #
157 # @param LineContent: the line need to be checked
158 # @return: True if current line is copyright line, False else
159 #
160 def _IsCopyrightLine (LineContent):
161 LineContent = LineContent.upper()
162 Result = False
163
164 ReIsCopyrightRe = re.compile(r"""(^|\s)COPYRIGHT *\(""", re.DOTALL)
165 if ReIsCopyrightRe.search(LineContent):
166 Result = True
167
168 return Result
169
170 ## ParseGenericComment
171 #
172 # @param GenericComment: Generic comment list, element of
173 # (CommentLine, LineNum)
174 # @param ContainerFile: Input value for filename of Dec file
175 #
176 def ParseGenericComment (GenericComment, ContainerFile=None, SkipTag=None):
177 if ContainerFile:
178 pass
179 HelpTxt = None
180 HelpStr = ''
181
182 for Item in GenericComment:
183 CommentLine = Item[0]
184 Comment = CleanString2(CommentLine)[1]
185 if SkipTag is not None and Comment.startswith(SkipTag):
186 Comment = Comment.replace(SkipTag, '', 1)
187 HelpStr += Comment + '\n'
188
189 if HelpStr:
190 HelpTxt = TextObject()
191 if HelpStr.endswith('\n') and not HelpStr.endswith('\n\n') and HelpStr != '\n':
192 HelpStr = HelpStr[:-1]
193 HelpTxt.SetString(HelpStr)
194
195 return HelpTxt
196
197 ## ParsePcdErrorCode
198 #
199 # @param Value: original ErrorCode value
200 # @param ContainerFile: Input value for filename of Dec file
201 # @param LineNum: Line Num
202 #
203 def ParsePcdErrorCode (Value = None, ContainerFile = None, LineNum = None):
204 try:
205 if Value.strip().startswith((TAB_HEX_START, TAB_CAPHEX_START)):
206 Base = 16
207 else:
208 Base = 10
209 ErrorCode = int(Value, Base)
210 if ErrorCode > PCD_ERR_CODE_MAX_SIZE or ErrorCode < 0:
211 Logger.Error('Parser',
212 FORMAT_NOT_SUPPORTED,
213 "The format %s of ErrorCode is not valid, should be UNIT32 type or long type" % Value,
214 File = ContainerFile,
215 Line = LineNum)
216 return hex(ErrorCode)
217 except ValueError as XStr:
218 if XStr:
219 pass
220 Logger.Error('Parser',
221 FORMAT_NOT_SUPPORTED,
222 "The format %s of ErrorCode is not valid, should be UNIT32 type or long type" % Value,
223 File = ContainerFile,
224 Line = LineNum)
225
226 ## ParseDecPcdGenericComment
227 #
228 # @param GenericComment: Generic comment list, element of (CommentLine,
229 # LineNum)
230 # @param ContainerFile: Input value for filename of Dec file
231 #
232 def ParseDecPcdGenericComment (GenericComment, ContainerFile, TokenSpaceGuidCName, CName, MacroReplaceDict):
233 HelpStr = ''
234 PromptStr = ''
235 PcdErr = None
236 PcdErrList = []
237 ValidValueNum = 0
238 ValidRangeNum = 0
239 ExpressionNum = 0
240
241 for (CommentLine, LineNum) in GenericComment:
242 Comment = CleanString2(CommentLine)[1]
243 #
244 # To replace Macro
245 #
246 MACRO_PATTERN = '[\t\s]*\$\([A-Z][_A-Z0-9]*\)'
247 MatchedStrs = re.findall(MACRO_PATTERN, Comment)
248 for MatchedStr in MatchedStrs:
249 if MatchedStr:
250 Macro = MatchedStr.strip().lstrip('$(').rstrip(')').strip()
251 if Macro in MacroReplaceDict:
252 Comment = Comment.replace(MatchedStr, MacroReplaceDict[Macro])
253 if Comment.startswith(TAB_PCD_VALIDRANGE):
254 if ValidValueNum > 0 or ExpressionNum > 0:
255 Logger.Error('Parser',
256 FORMAT_NOT_SUPPORTED,
257 ST.WRN_MULTI_PCD_RANGES,
258 File = ContainerFile,
259 Line = LineNum)
260 else:
261 PcdErr = PcdErrorObject()
262 PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
263 PcdErr.SetCName(CName)
264 PcdErr.SetFileLine(Comment)
265 PcdErr.SetLineNum(LineNum)
266 ValidRangeNum += 1
267 ValidRange = Comment.replace(TAB_PCD_VALIDRANGE, "", 1).strip()
268 Valid, Cause = _CheckRangeExpression(ValidRange)
269 if Valid:
270 ValueList = ValidRange.split(TAB_VALUE_SPLIT)
271 if len(ValueList) > 1:
272 PcdErr.SetValidValueRange((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
273 PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
274 else:
275 PcdErr.SetValidValueRange(ValidRange)
276 PcdErrList.append(PcdErr)
277 else:
278 Logger.Error("Parser",
279 FORMAT_NOT_SUPPORTED,
280 Cause,
281 ContainerFile,
282 LineNum)
283 elif Comment.startswith(TAB_PCD_VALIDLIST):
284 if ValidRangeNum > 0 or ExpressionNum > 0:
285 Logger.Error('Parser',
286 FORMAT_NOT_SUPPORTED,
287 ST.WRN_MULTI_PCD_RANGES,
288 File = ContainerFile,
289 Line = LineNum)
290 elif ValidValueNum > 0:
291 Logger.Error('Parser',
292 FORMAT_NOT_SUPPORTED,
293 ST.WRN_MULTI_PCD_VALIDVALUE,
294 File = ContainerFile,
295 Line = LineNum)
296 else:
297 PcdErr = PcdErrorObject()
298 PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
299 PcdErr.SetCName(CName)
300 PcdErr.SetFileLine(Comment)
301 PcdErr.SetLineNum(LineNum)
302 ValidValueNum += 1
303 ValidValueExpr = Comment.replace(TAB_PCD_VALIDLIST, "", 1).strip()
304 Valid, Cause = _CheckListExpression(ValidValueExpr)
305 if Valid:
306 ValidValue = Comment.replace(TAB_PCD_VALIDLIST, "", 1).replace(TAB_COMMA_SPLIT, TAB_SPACE_SPLIT)
307 ValueList = ValidValue.split(TAB_VALUE_SPLIT)
308 if len(ValueList) > 1:
309 PcdErr.SetValidValue((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
310 PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
311 else:
312 PcdErr.SetValidValue(ValidValue)
313 PcdErrList.append(PcdErr)
314 else:
315 Logger.Error("Parser",
316 FORMAT_NOT_SUPPORTED,
317 Cause,
318 ContainerFile,
319 LineNum)
320 elif Comment.startswith(TAB_PCD_EXPRESSION):
321 if ValidRangeNum > 0 or ValidValueNum > 0:
322 Logger.Error('Parser',
323 FORMAT_NOT_SUPPORTED,
324 ST.WRN_MULTI_PCD_RANGES,
325 File = ContainerFile,
326 Line = LineNum)
327 else:
328 PcdErr = PcdErrorObject()
329 PcdErr.SetTokenSpaceGuidCName(TokenSpaceGuidCName)
330 PcdErr.SetCName(CName)
331 PcdErr.SetFileLine(Comment)
332 PcdErr.SetLineNum(LineNum)
333 ExpressionNum += 1
334 Expression = Comment.replace(TAB_PCD_EXPRESSION, "", 1).strip()
335 Valid, Cause = _CheckExpression(Expression)
336 if Valid:
337 ValueList = Expression.split(TAB_VALUE_SPLIT)
338 if len(ValueList) > 1:
339 PcdErr.SetExpression((TAB_VALUE_SPLIT.join(ValueList[1:])).strip())
340 PcdErr.SetErrorNumber(ParsePcdErrorCode(ValueList[0], ContainerFile, LineNum))
341 else:
342 PcdErr.SetExpression(Expression)
343 PcdErrList.append(PcdErr)
344 else:
345 Logger.Error("Parser",
346 FORMAT_NOT_SUPPORTED,
347 Cause,
348 ContainerFile,
349 LineNum)
350 elif Comment.startswith(TAB_PCD_PROMPT):
351 if PromptStr:
352 Logger.Error('Parser',
353 FORMAT_NOT_SUPPORTED,
354 ST.WRN_MULTI_PCD_PROMPT,
355 File = ContainerFile,
356 Line = LineNum)
357 PromptStr = Comment.replace(TAB_PCD_PROMPT, "", 1).strip()
358 else:
359 if Comment:
360 HelpStr += Comment + '\n'
361
362 #
363 # remove the last EOL if the comment is of format 'FOO\n'
364 #
365 if HelpStr.endswith('\n'):
366 if HelpStr != '\n' and not HelpStr.endswith('\n\n'):
367 HelpStr = HelpStr[:-1]
368
369 return HelpStr, PcdErrList, PromptStr
370
371 ## ParseDecPcdTailComment
372 #
373 # @param TailCommentList: Tail comment list of Pcd, item of format (Comment, LineNum)
374 # @param ContainerFile: Input value for filename of Dec file
375 # @retVal SupModuleList: The supported module type list detected
376 # @retVal HelpStr: The generic help text string detected
377 #
378 def ParseDecPcdTailComment (TailCommentList, ContainerFile):
379 assert(len(TailCommentList) == 1)
380 TailComment = TailCommentList[0][0]
381 LineNum = TailCommentList[0][1]
382
383 Comment = TailComment.lstrip(" #")
384
385 ReFindFirstWordRe = re.compile(r"""^([^ #]*)""", re.DOTALL)
386
387 #
388 # get first word and compare with SUP_MODULE_LIST
389 #
390 MatchObject = ReFindFirstWordRe.match(Comment)
391 if not (MatchObject and MatchObject.group(1) in SUP_MODULE_LIST):
392 return None, Comment
393
394 #
395 # parse line, it must have supported module type specified
396 #
397 if Comment.find(TAB_COMMENT_SPLIT) == -1:
398 Comment += TAB_COMMENT_SPLIT
399 SupMode, HelpStr = GetSplitValueList(Comment, TAB_COMMENT_SPLIT, 1)
400 SupModuleList = []
401 for Mod in GetSplitValueList(SupMode, TAB_SPACE_SPLIT):
402 if not Mod:
403 continue
404 elif Mod not in SUP_MODULE_LIST:
405 Logger.Error("UPT",
406 FORMAT_INVALID,
407 ST.WRN_INVALID_MODULE_TYPE%Mod,
408 ContainerFile,
409 LineNum)
410 else:
411 SupModuleList.append(Mod)
412
413 return SupModuleList, HelpStr
414
415 ## _CheckListExpression
416 #
417 # @param Expression: Pcd value list expression
418 #
419 def _CheckListExpression(Expression):
420 ListExpr = ''
421 if TAB_VALUE_SPLIT in Expression:
422 ListExpr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
423 else:
424 ListExpr = Expression
425
426 return IsValidListExpr(ListExpr)
427
428 ## _CheckExpreesion
429 #
430 # @param Expression: Pcd value expression
431 #
432 def _CheckExpression(Expression):
433 Expr = ''
434 if TAB_VALUE_SPLIT in Expression:
435 Expr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
436 else:
437 Expr = Expression
438 return IsValidLogicalExpr(Expr, True)
439
440 ## _CheckRangeExpression
441 #
442 # @param Expression: Pcd range expression
443 #
444 def _CheckRangeExpression(Expression):
445 RangeExpr = ''
446 if TAB_VALUE_SPLIT in Expression:
447 RangeExpr = Expression[Expression.find(TAB_VALUE_SPLIT)+1:]
448 else:
449 RangeExpr = Expression
450
451 return IsValidRangeExpr(RangeExpr)
452
453 ## ValidateCopyright
454 #
455 #
456 #
457 def ValidateCopyright(Result, ErrType, FileName, LineNo, ErrMsg):
458 if not Result:
459 Logger.Warn("\nUPT", ErrType, FileName, LineNo, ErrMsg)
460
461 ## _ValidateCopyright
462 #
463 # @param Line: Line that contains copyright information, # stripped
464 #
465 # @retval Result: True if line is conformed to Spec format, False else
466 # @retval ErrMsg: the detailed error description
467 #
468 def _ValidateCopyright(Line):
469 if Line:
470 pass
471 Result = True
472 ErrMsg = ''
473
474 return Result, ErrMsg
475
476 def GenerateTokenList (Comment):
477 #
478 # Tokenize Comment using '#' and ' ' as token seperators
479 #
480 RelplacedComment = None
481 while Comment != RelplacedComment:
482 RelplacedComment = Comment
483 Comment = Comment.replace('##', '#').replace(' ', ' ').replace(' ', '#').strip('# ')
484 return Comment.split('#')
485
486
487 #
488 # Comment - Comment to parse
489 # TypeTokens - A dictionary of type token synonyms
490 # RemoveTokens - A list of tokens to remove from help text
491 # ParseVariable - True for parsing [Guids]. Otherwise False
492 #
493 def ParseComment (Comment, UsageTokens, TypeTokens, RemoveTokens, ParseVariable):
494 #
495 # Initialize return values
496 #
497 Usage = None
498 Type = None
499 String = None
500
501 Comment = Comment[0]
502
503 NumTokens = 2
504 if ParseVariable:
505 #
506 # Remove white space around first instance of ':' from Comment if 'Variable'
507 # is in front of ':' and Variable is the 1st or 2nd token in Comment.
508 #
509 List = Comment.split(':', 1)
510 if len(List) > 1:
511 SubList = GenerateTokenList (List[0].strip())
512 if len(SubList) in [1, 2] and SubList[-1] == 'Variable':
513 if List[1].strip().find('L"') == 0:
514 Comment = List[0].strip() + ':' + List[1].strip()
515
516 #
517 # Remove first instance of L"<VariableName> from Comment and put into String
518 # if and only if L"<VariableName>" is the 1st token, the 2nd token. Or
519 # L"<VariableName>" is the third token immediately following 'Variable:'.
520 #
521 End = -1
522 Start = Comment.find('Variable:L"')
523 if Start >= 0:
524 String = Comment[Start + 9:]
525 End = String[2:].find('"')
526 else:
527 Start = Comment.find('L"')
528 if Start >= 0:
529 String = Comment[Start:]
530 End = String[2:].find('"')
531 if End >= 0:
532 SubList = GenerateTokenList (Comment[:Start])
533 if len(SubList) < 2:
534 Comment = Comment[:Start] + String[End + 3:]
535 String = String[:End + 3]
536 Type = 'Variable'
537 NumTokens = 1
538
539 #
540 # Initialze HelpText to Comment.
541 # Content will be remove from HelpText as matching tokens are found
542 #
543 HelpText = Comment
544
545 #
546 # Tokenize Comment using '#' and ' ' as token seperators
547 #
548 List = GenerateTokenList (Comment)
549
550 #
551 # Search first two tokens for Usage and Type and remove any matching tokens
552 # from HelpText
553 #
554 for Token in List[0:NumTokens]:
555 if Usage is None and Token in UsageTokens:
556 Usage = UsageTokens[Token]
557 HelpText = HelpText.replace(Token, '')
558 if Usage is not None or not ParseVariable:
559 for Token in List[0:NumTokens]:
560 if Type is None and Token in TypeTokens:
561 Type = TypeTokens[Token]
562 HelpText = HelpText.replace(Token, '')
563 if Usage is not None:
564 for Token in List[0:NumTokens]:
565 if Token in RemoveTokens:
566 HelpText = HelpText.replace(Token, '')
567
568 #
569 # If no Usage token is present and set Usage to UNDEFINED
570 #
571 if Usage is None:
572 Usage = 'UNDEFINED'
573
574 #
575 # If no Type token is present and set Type to UNDEFINED
576 #
577 if Type is None:
578 Type = 'UNDEFINED'
579
580 #
581 # If Type is not 'Variable:', then set String to None
582 #
583 if Type != 'Variable':
584 String = None
585
586 #
587 # Strip ' ' and '#' from the beginning of HelpText
588 # If HelpText is an empty string after all parsing is
589 # complete then set HelpText to None
590 #
591 HelpText = HelpText.lstrip('# ')
592 if HelpText == '':
593 HelpText = None
594
595 #
596 # Return parsing results
597 #
598 return Usage, Type, String, HelpText