]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Scripts/PatchCheck.py
BaseTools: Use pip module if available, CI uses it by default
[mirror_edk2.git] / BaseTools / Scripts / PatchCheck.py
1 ## @file
2 # Check a patch for various format issues
3 #
4 # Copyright (c) 2015 - 2020, Intel Corporation. All rights reserved.<BR>
5 # Copyright (C) 2020, Red Hat, Inc.<BR>
6 # Copyright (c) 2020, ARM Ltd. All rights reserved.<BR>
7 #
8 # SPDX-License-Identifier: BSD-2-Clause-Patent
9 #
10
11 from __future__ import print_function
12
13 VersionNumber = '0.1'
14 __copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation All rights reserved."
15
16 import email
17 import argparse
18 import os
19 import re
20 import subprocess
21 import sys
22
23 import email.header
24
25 class Verbose:
26 SILENT, ONELINE, NORMAL = range(3)
27 level = NORMAL
28
29 class EmailAddressCheck:
30 """Checks an email address."""
31
32 def __init__(self, email, description):
33 self.ok = True
34
35 if email is None:
36 self.error('Email address is missing!')
37 return
38 if description is None:
39 self.error('Email description is missing!')
40 return
41
42 self.description = "'" + description + "'"
43 self.check_email_address(email)
44
45 def error(self, *err):
46 if self.ok and Verbose.level > Verbose.ONELINE:
47 print('The ' + self.description + ' email address is not valid:')
48 self.ok = False
49 if Verbose.level < Verbose.NORMAL:
50 return
51 count = 0
52 for line in err:
53 prefix = (' *', ' ')[count > 0]
54 print(prefix, line)
55 count += 1
56
57 email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
58 re.MULTILINE|re.IGNORECASE)
59
60 def check_email_address(self, email):
61 email = email.strip()
62 mo = self.email_re1.match(email)
63 if mo is None:
64 self.error("Email format is invalid: " + email.strip())
65 return
66
67 name = mo.group(1).strip()
68 if name == '':
69 self.error("Name is not provided with email address: " +
70 email)
71 else:
72 quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
73 if name.find(',') >= 0 and not quoted:
74 self.error('Add quotes (") around name with a comma: ' +
75 name)
76
77 if mo.group(2) == '':
78 self.error("There should be a space between the name and " +
79 "email address: " + email)
80
81 if mo.group(3).find(' ') >= 0:
82 self.error("The email address cannot contain a space: " +
83 mo.group(3))
84
85 if ' via Groups.Io' in name and mo.group(3).endswith('@groups.io'):
86 self.error("Email rewritten by lists DMARC / DKIM / SPF: " +
87 email)
88
89 class CommitMessageCheck:
90 """Checks the contents of a git commit message."""
91
92 def __init__(self, subject, message):
93 self.ok = True
94
95 if subject is None and message is None:
96 self.error('Commit message is missing!')
97 return
98
99 self.subject = subject
100 self.msg = message
101
102 print (subject)
103
104 self.check_contributed_under()
105 self.check_signed_off_by()
106 self.check_misc_signatures()
107 self.check_overall_format()
108 self.report_message_result()
109
110 url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
111
112 def report_message_result(self):
113 if Verbose.level < Verbose.NORMAL:
114 return
115 if self.ok:
116 # All checks passed
117 return_code = 0
118 print('The commit message format passed all checks.')
119 else:
120 return_code = 1
121 if not self.ok:
122 print(self.url)
123
124 def error(self, *err):
125 if self.ok and Verbose.level > Verbose.ONELINE:
126 print('The commit message format is not valid:')
127 self.ok = False
128 if Verbose.level < Verbose.NORMAL:
129 return
130 count = 0
131 for line in err:
132 prefix = (' *', ' ')[count > 0]
133 print(prefix, line)
134 count += 1
135
136 # Find 'contributed-under:' at the start of a line ignoring case and
137 # requires ':' to be present. Matches if there is white space before
138 # the tag or between the tag and the ':'.
139 contributed_under_re = \
140 re.compile(r'^\s*contributed-under\s*:', re.MULTILINE|re.IGNORECASE)
141
142 def check_contributed_under(self):
143 match = self.contributed_under_re.search(self.msg)
144 if match is not None:
145 self.error('Contributed-under! (Note: this must be ' +
146 'removed by the code contributor!)')
147
148 @staticmethod
149 def make_signature_re(sig, re_input=False):
150 if re_input:
151 sub_re = sig
152 else:
153 sub_re = sig.replace('-', r'[-\s]+')
154 re_str = (r'^(?P<tag>' + sub_re +
155 r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
156 try:
157 return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
158 except Exception:
159 print("Tried to compile re:", re_str)
160 raise
161
162 sig_block_re = \
163 re.compile(r'''^
164 (?: (?P<tag>[^:]+) \s* : \s*
165 (?P<value>\S.*?) )
166 |
167 (?: \[ (?P<updater>[^:]+) \s* : \s*
168 (?P<note>.+?) \s* \] )
169 \s* $''',
170 re.VERBOSE | re.MULTILINE)
171
172 def find_signatures(self, sig):
173 if not sig.endswith('-by') and sig != 'Cc':
174 sig += '-by'
175 regex = self.make_signature_re(sig)
176
177 sigs = regex.findall(self.msg)
178
179 bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
180 for s in bad_case_sigs:
181 self.error("'" +s[0] + "' should be '" + sig + "'")
182
183 for s in sigs:
184 if s[1] != '':
185 self.error('There should be no spaces between ' + sig +
186 " and the ':'")
187 if s[2] != ' ':
188 self.error("There should be a space after '" + sig + ":'")
189
190 EmailAddressCheck(s[3], sig)
191
192 return sigs
193
194 def check_signed_off_by(self):
195 sob='Signed-off-by'
196 if self.msg.find(sob) < 0:
197 self.error('Missing Signed-off-by! (Note: this must be ' +
198 'added by the code contributor!)')
199 return
200
201 sobs = self.find_signatures('Signed-off')
202
203 if len(sobs) == 0:
204 self.error('Invalid Signed-off-by format!')
205 return
206
207 sig_types = (
208 'Reviewed',
209 'Reported',
210 'Tested',
211 'Suggested',
212 'Acked',
213 'Cc'
214 )
215
216 def check_misc_signatures(self):
217 for sig in self.sig_types:
218 self.find_signatures(sig)
219
220 cve_re = re.compile('CVE-[0-9]{4}-[0-9]{5}[^0-9]')
221
222 def check_overall_format(self):
223 lines = self.msg.splitlines()
224
225 if len(lines) >= 1 and lines[0].endswith('\r\n'):
226 empty_line = '\r\n'
227 else:
228 empty_line = '\n'
229
230 lines.insert(0, empty_line)
231 lines.insert(0, self.subject + empty_line)
232
233 count = len(lines)
234
235 if count <= 0:
236 self.error('Empty commit message!')
237 return
238
239 if count >= 1 and re.search(self.cve_re, lines[0]):
240 #
241 # If CVE-xxxx-xxxxx is present in subject line, then limit length of
242 # subject line to 92 characters
243 #
244 if len(lines[0].rstrip()) >= 93:
245 self.error(
246 'First line of commit message (subject line) is too long (%d >= 93).' %
247 (len(lines[0].rstrip()))
248 )
249 else:
250 #
251 # If CVE-xxxx-xxxxx is not present in subject line, then limit
252 # length of subject line to 75 characters
253 #
254 if len(lines[0].rstrip()) >= 76:
255 self.error(
256 'First line of commit message (subject line) is too long (%d >= 76).' %
257 (len(lines[0].rstrip()))
258 )
259
260 if count >= 1 and len(lines[0].strip()) == 0:
261 self.error('First line of commit message (subject line) ' +
262 'is empty.')
263
264 if count >= 2 and lines[1].strip() != '':
265 self.error('Second line of commit message should be ' +
266 'empty.')
267
268 for i in range(2, count):
269 if (len(lines[i]) >= 76 and
270 len(lines[i].split()) > 1 and
271 not lines[i].startswith('git-svn-id:') and
272 not lines[i].startswith('Reviewed-by') and
273 not lines[i].startswith('Acked-by:') and
274 not lines[i].startswith('Tested-by:') and
275 not lines[i].startswith('Reported-by:') and
276 not lines[i].startswith('Suggested-by:') and
277 not lines[i].startswith('Signed-off-by:') and
278 not lines[i].startswith('Cc:')):
279 #
280 # Print a warning if body line is longer than 75 characters
281 #
282 print(
283 'WARNING - Line %d of commit message is too long (%d >= 76).' %
284 (i + 1, len(lines[i]))
285 )
286 print(lines[i])
287
288 last_sig_line = None
289 for i in range(count - 1, 0, -1):
290 line = lines[i]
291 mo = self.sig_block_re.match(line)
292 if mo is None:
293 if line.strip() == '':
294 break
295 elif last_sig_line is not None:
296 err2 = 'Add empty line before "%s"?' % last_sig_line
297 self.error('The line before the signature block ' +
298 'should be empty', err2)
299 else:
300 self.error('The signature block was not found')
301 break
302 last_sig_line = line.strip()
303
304 (START, PRE_PATCH, PATCH) = range(3)
305
306 class GitDiffCheck:
307 """Checks the contents of a git diff."""
308
309 def __init__(self, diff):
310 self.ok = True
311 self.format_ok = True
312 self.lines = diff.splitlines(True)
313 self.count = len(self.lines)
314 self.line_num = 0
315 self.state = START
316 self.new_bin = []
317 while self.line_num < self.count and self.format_ok:
318 line_num = self.line_num
319 self.run()
320 assert(self.line_num > line_num)
321 self.report_message_result()
322
323 def report_message_result(self):
324 if Verbose.level < Verbose.NORMAL:
325 return
326 if self.ok:
327 print('The code passed all checks.')
328 if self.new_bin:
329 print('\nWARNING - The following binary files will be added ' +
330 'into the repository:')
331 for binary in self.new_bin:
332 print(' ' + binary)
333
334 def run(self):
335 line = self.lines[self.line_num]
336
337 if self.state in (PRE_PATCH, PATCH):
338 if line.startswith('diff --git'):
339 self.state = START
340 if self.state == PATCH:
341 if line.startswith('@@ '):
342 self.state = PRE_PATCH
343 elif len(line) >= 1 and line[0] not in ' -+' and \
344 not line.startswith('\r\n') and \
345 not line.startswith(r'\ No newline ') and not self.binary:
346 for line in self.lines[self.line_num + 1:]:
347 if line.startswith('diff --git'):
348 self.format_error('diff found after end of patch')
349 break
350 self.line_num = self.count
351 return
352
353 if self.state == START:
354 if line.startswith('diff --git'):
355 self.state = PRE_PATCH
356 self.filename = line[13:].split(' ', 1)[0]
357 self.is_newfile = False
358 self.force_crlf = True
359 self.force_notabs = True
360 if self.filename.endswith('.sh') or \
361 self.filename.startswith('BaseTools/BinWrappers/PosixLike/') or \
362 self.filename.startswith('BaseTools/BinPipWrappers/PosixLike/') or \
363 self.filename.startswith('BaseTools/Bin/CYGWIN_NT-5.1-i686/') or \
364 self.filename == 'BaseTools/BuildEnv':
365 #
366 # Do not enforce CR/LF line endings for linux shell scripts.
367 # Some linux shell scripts don't end with the ".sh" extension,
368 # they are identified by their path.
369 #
370 self.force_crlf = False
371 if self.filename == '.gitmodules' or \
372 self.filename == 'BaseTools/Conf/diff.order':
373 #
374 # .gitmodules and diff orderfiles are used internally by git
375 # use tabs and LF line endings. Do not enforce no tabs and
376 # do not enforce CR/LF line endings.
377 #
378 self.force_crlf = False
379 self.force_notabs = False
380 elif len(line.rstrip()) != 0:
381 self.format_error("didn't find diff command")
382 self.line_num += 1
383 elif self.state == PRE_PATCH:
384 if line.startswith('@@ '):
385 self.state = PATCH
386 self.binary = False
387 elif line.startswith('GIT binary patch') or \
388 line.startswith('Binary files'):
389 self.state = PATCH
390 self.binary = True
391 if self.is_newfile:
392 self.new_bin.append(self.filename)
393 elif line.startswith('new file mode 160000'):
394 #
395 # New submodule. Do not enforce CR/LF line endings
396 #
397 self.force_crlf = False
398 else:
399 ok = False
400 self.is_newfile = self.newfile_prefix_re.match(line)
401 for pfx in self.pre_patch_prefixes:
402 if line.startswith(pfx):
403 ok = True
404 if not ok:
405 self.format_error("didn't find diff hunk marker (@@)")
406 self.line_num += 1
407 elif self.state == PATCH:
408 if self.binary:
409 pass
410 elif line.startswith('-'):
411 pass
412 elif line.startswith('+'):
413 self.check_added_line(line[1:])
414 elif line.startswith('\r\n'):
415 pass
416 elif line.startswith(r'\ No newline '):
417 pass
418 elif not line.startswith(' '):
419 self.format_error("unexpected patch line")
420 self.line_num += 1
421
422 pre_patch_prefixes = (
423 '--- ',
424 '+++ ',
425 'index ',
426 'new file ',
427 'deleted file ',
428 'old mode ',
429 'new mode ',
430 'similarity index ',
431 'copy from ',
432 'copy to ',
433 'rename ',
434 )
435
436 line_endings = ('\r\n', '\n\r', '\n', '\r')
437
438 newfile_prefix_re = \
439 re.compile(r'''^
440 index\ 0+\.\.
441 ''',
442 re.VERBOSE)
443
444 def added_line_error(self, msg, line):
445 lines = [ msg ]
446 if self.filename is not None:
447 lines.append('File: ' + self.filename)
448 lines.append('Line: ' + line)
449
450 self.error(*lines)
451
452 old_debug_re = \
453 re.compile(r'''
454 DEBUG \s* \( \s* \( \s*
455 (?: DEBUG_[A-Z_]+ \s* \| \s*)*
456 EFI_D_ ([A-Z_]+)
457 ''',
458 re.VERBOSE)
459
460 def check_added_line(self, line):
461 eol = ''
462 for an_eol in self.line_endings:
463 if line.endswith(an_eol):
464 eol = an_eol
465 line = line[:-len(eol)]
466
467 stripped = line.rstrip()
468
469 if self.force_crlf and eol != '\r\n' and (line.find('Subproject commit') == -1):
470 self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
471 line)
472 if self.force_notabs and '\t' in line:
473 self.added_line_error('Tab character used', line)
474 if len(stripped) < len(line):
475 self.added_line_error('Trailing whitespace found', line)
476
477 mo = self.old_debug_re.search(line)
478 if mo is not None:
479 self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
480 'but DEBUG_' + mo.group(1) +
481 ' is now recommended', line)
482
483 split_diff_re = re.compile(r'''
484 (?P<cmd>
485 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
486 )
487 (?P<index>
488 ^ index \s+ .+ $
489 )
490 ''',
491 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
492
493 def format_error(self, err):
494 self.format_ok = False
495 err = 'Patch format error: ' + err
496 err2 = 'Line: ' + self.lines[self.line_num].rstrip()
497 self.error(err, err2)
498
499 def error(self, *err):
500 if self.ok and Verbose.level > Verbose.ONELINE:
501 print('Code format is not valid:')
502 self.ok = False
503 if Verbose.level < Verbose.NORMAL:
504 return
505 count = 0
506 for line in err:
507 prefix = (' *', ' ')[count > 0]
508 print(prefix, line)
509 count += 1
510
511 class CheckOnePatch:
512 """Checks the contents of a git email formatted patch.
513
514 Various checks are performed on both the commit message and the
515 patch content.
516 """
517
518 def __init__(self, name, patch):
519 self.patch = patch
520 self.find_patch_pieces()
521
522 email_check = EmailAddressCheck(self.author_email, 'Author')
523 email_ok = email_check.ok
524
525 msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
526 msg_ok = msg_check.ok
527
528 diff_ok = True
529 if self.diff is not None:
530 diff_check = GitDiffCheck(self.diff)
531 diff_ok = diff_check.ok
532
533 self.ok = email_ok and msg_ok and diff_ok
534
535 if Verbose.level == Verbose.ONELINE:
536 if self.ok:
537 result = 'ok'
538 else:
539 result = list()
540 if not msg_ok:
541 result.append('commit message')
542 if not diff_ok:
543 result.append('diff content')
544 result = 'bad ' + ' and '.join(result)
545 print(name, result)
546
547
548 git_diff_re = re.compile(r'''
549 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
550 ''',
551 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
552
553 stat_re = \
554 re.compile(r'''
555 (?P<commit_message> [\s\S\r\n]* )
556 (?P<stat>
557 ^ --- $ [\r\n]+
558 (?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
559 $ [\r\n]+ )+
560 [\s\S\r\n]+
561 )
562 ''',
563 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
564
565 subject_prefix_re = \
566 re.compile(r'''^
567 \s* (\[
568 [^\[\]]* # Allow all non-brackets
569 \])* \s*
570 ''',
571 re.VERBOSE)
572
573 def find_patch_pieces(self):
574 if sys.version_info < (3, 0):
575 patch = self.patch.encode('ascii', 'ignore')
576 else:
577 patch = self.patch
578
579 self.commit_msg = None
580 self.stat = None
581 self.commit_subject = None
582 self.commit_prefix = None
583 self.diff = None
584
585 if patch.startswith('diff --git'):
586 self.diff = patch
587 return
588
589 pmail = email.message_from_string(patch)
590 parts = list(pmail.walk())
591 assert(len(parts) == 1)
592 assert(parts[0].get_content_type() == 'text/plain')
593 content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
594
595 mo = self.git_diff_re.search(content)
596 if mo is not None:
597 self.diff = content[mo.start():]
598 content = content[:mo.start()]
599
600 mo = self.stat_re.search(content)
601 if mo is None:
602 self.commit_msg = content
603 else:
604 self.stat = mo.group('stat')
605 self.commit_msg = mo.group('commit_message')
606 #
607 # Parse subject line from email header. The subject line may be
608 # composed of multiple parts with different encodings. Decode and
609 # combine all the parts to produce a single string with the contents of
610 # the decoded subject line.
611 #
612 parts = email.header.decode_header(pmail.get('subject'))
613 subject = ''
614 for (part, encoding) in parts:
615 if encoding:
616 part = part.decode(encoding)
617 else:
618 try:
619 part = part.decode()
620 except:
621 pass
622 subject = subject + part
623
624 self.commit_subject = subject.replace('\r\n', '')
625 self.commit_subject = self.commit_subject.replace('\n', '')
626 self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
627
628 self.author_email = pmail['from']
629
630 class CheckGitCommits:
631 """Reads patches from git based on the specified git revision range.
632
633 The patches are read from git, and then checked.
634 """
635
636 def __init__(self, rev_spec, max_count):
637 commits = self.read_commit_list_from_git(rev_spec, max_count)
638 if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
639 commits = [ rev_spec ]
640 self.ok = True
641 blank_line = False
642 for commit in commits:
643 if Verbose.level > Verbose.ONELINE:
644 if blank_line:
645 print()
646 else:
647 blank_line = True
648 print('Checking git commit:', commit)
649 email = self.read_committer_email_address_from_git(commit)
650 self.ok &= EmailAddressCheck(email, 'Committer').ok
651 patch = self.read_patch_from_git(commit)
652 self.ok &= CheckOnePatch(commit, patch).ok
653 if not commits:
654 print("Couldn't find commit matching: '{}'".format(rev_spec))
655
656 def read_commit_list_from_git(self, rev_spec, max_count):
657 # Run git to get the commit patch
658 cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
659 if max_count is not None:
660 cmd.append('--max-count=' + str(max_count))
661 cmd.append(rev_spec)
662 out = self.run_git(*cmd)
663 return out.split() if out else []
664
665 def read_patch_from_git(self, commit):
666 # Run git to get the commit patch
667 return self.run_git('show', '--pretty=email', '--no-textconv',
668 '--no-use-mailmap', commit)
669
670 def read_committer_email_address_from_git(self, commit):
671 # Run git to get the committer email
672 return self.run_git('show', '--pretty=%cn <%ce>', '--no-patch',
673 '--no-use-mailmap', commit)
674
675 def run_git(self, *args):
676 cmd = [ 'git' ]
677 cmd += args
678 p = subprocess.Popen(cmd,
679 stdout=subprocess.PIPE,
680 stderr=subprocess.STDOUT)
681 Result = p.communicate()
682 return Result[0].decode('utf-8', 'ignore') if Result[0] and Result[0].find(b"fatal")!=0 else None
683
684 class CheckOnePatchFile:
685 """Performs a patch check for a single file.
686
687 stdin is used when the filename is '-'.
688 """
689
690 def __init__(self, patch_filename):
691 if patch_filename == '-':
692 patch = sys.stdin.read()
693 patch_filename = 'stdin'
694 else:
695 f = open(patch_filename, 'rb')
696 patch = f.read().decode('utf-8', 'ignore')
697 f.close()
698 if Verbose.level > Verbose.ONELINE:
699 print('Checking patch file:', patch_filename)
700 self.ok = CheckOnePatch(patch_filename, patch).ok
701
702 class CheckOneArg:
703 """Performs a patch check for a single command line argument.
704
705 The argument will be handed off to a file or git-commit based
706 checker.
707 """
708
709 def __init__(self, param, max_count=None):
710 self.ok = True
711 if param == '-' or os.path.exists(param):
712 checker = CheckOnePatchFile(param)
713 else:
714 checker = CheckGitCommits(param, max_count)
715 self.ok = checker.ok
716
717 class PatchCheckApp:
718 """Checks patches based on the command line arguments."""
719
720 def __init__(self):
721 self.parse_options()
722 patches = self.args.patches
723
724 if len(patches) == 0:
725 patches = [ 'HEAD' ]
726
727 self.ok = True
728 self.count = None
729 for patch in patches:
730 self.process_one_arg(patch)
731
732 if self.count is not None:
733 self.process_one_arg('HEAD')
734
735 if self.ok:
736 self.retval = 0
737 else:
738 self.retval = -1
739
740 def process_one_arg(self, arg):
741 if len(arg) >= 2 and arg[0] == '-':
742 try:
743 self.count = int(arg[1:])
744 return
745 except ValueError:
746 pass
747 self.ok &= CheckOneArg(arg, self.count).ok
748 self.count = None
749
750 def parse_options(self):
751 parser = argparse.ArgumentParser(description=__copyright__)
752 parser.add_argument('--version', action='version',
753 version='%(prog)s ' + VersionNumber)
754 parser.add_argument('patches', nargs='*',
755 help='[patch file | git rev list]')
756 group = parser.add_mutually_exclusive_group()
757 group.add_argument("--oneline",
758 action="store_true",
759 help="Print one result per line")
760 group.add_argument("--silent",
761 action="store_true",
762 help="Print nothing")
763 self.args = parser.parse_args()
764 if self.args.oneline:
765 Verbose.level = Verbose.ONELINE
766 if self.args.silent:
767 Verbose.level = Verbose.SILENT
768
769 if __name__ == "__main__":
770 sys.exit(PatchCheckApp().retval)