]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Scripts/PatchCheck.py
BaseTools/Scripts/PatchCheck.py: Do not use mailmap
[mirror_edk2.git] / BaseTools / Scripts / PatchCheck.py
1 ## @file
2 # Check a patch for various format issues
3 #
4 # Copyright (c) 2015 - 2020, Intel Corporation. All rights reserved.<BR>
5 # Copyright (C) 2020, Red Hat, Inc.<BR>
6 #
7 # SPDX-License-Identifier: BSD-2-Clause-Patent
8 #
9
10 from __future__ import print_function
11
12 VersionNumber = '0.1'
13 __copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation All rights reserved."
14
15 import email
16 import argparse
17 import os
18 import re
19 import subprocess
20 import sys
21
22 class Verbose:
23 SILENT, ONELINE, NORMAL = range(3)
24 level = NORMAL
25
26 class EmailAddressCheck:
27 """Checks an email address."""
28
29 def __init__(self, email, description):
30 self.ok = True
31
32 if email is None:
33 self.error('Email address is missing!')
34 return
35 if description is None:
36 self.error('Email description is missing!')
37 return
38
39 self.description = "'" + description + "'"
40 self.check_email_address(email)
41
42 def error(self, *err):
43 if self.ok and Verbose.level > Verbose.ONELINE:
44 print('The ' + self.description + ' email address is not valid:')
45 self.ok = False
46 if Verbose.level < Verbose.NORMAL:
47 return
48 count = 0
49 for line in err:
50 prefix = (' *', ' ')[count > 0]
51 print(prefix, line)
52 count += 1
53
54 email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
55 re.MULTILINE|re.IGNORECASE)
56
57 def check_email_address(self, email):
58 email = email.strip()
59 mo = self.email_re1.match(email)
60 if mo is None:
61 self.error("Email format is invalid: " + email.strip())
62 return
63
64 name = mo.group(1).strip()
65 if name == '':
66 self.error("Name is not provided with email address: " +
67 email)
68 else:
69 quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
70 if name.find(',') >= 0 and not quoted:
71 self.error('Add quotes (") around name with a comma: ' +
72 name)
73
74 if mo.group(2) == '':
75 self.error("There should be a space between the name and " +
76 "email address: " + email)
77
78 if mo.group(3).find(' ') >= 0:
79 self.error("The email address cannot contain a space: " +
80 mo.group(3))
81
82 if ' via Groups.Io' in name and mo.group(3).endswith('@groups.io'):
83 self.error("Email rewritten by lists DMARC / DKIM / SPF: " +
84 email)
85
86 class CommitMessageCheck:
87 """Checks the contents of a git commit message."""
88
89 def __init__(self, subject, message):
90 self.ok = True
91
92 if subject is None and message is None:
93 self.error('Commit message is missing!')
94 return
95
96 self.subject = subject
97 self.msg = message
98
99 print (subject)
100
101 self.check_contributed_under()
102 self.check_signed_off_by()
103 self.check_misc_signatures()
104 self.check_overall_format()
105 self.report_message_result()
106
107 url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
108
109 def report_message_result(self):
110 if Verbose.level < Verbose.NORMAL:
111 return
112 if self.ok:
113 # All checks passed
114 return_code = 0
115 print('The commit message format passed all checks.')
116 else:
117 return_code = 1
118 if not self.ok:
119 print(self.url)
120
121 def error(self, *err):
122 if self.ok and Verbose.level > Verbose.ONELINE:
123 print('The commit message format is not valid:')
124 self.ok = False
125 if Verbose.level < Verbose.NORMAL:
126 return
127 count = 0
128 for line in err:
129 prefix = (' *', ' ')[count > 0]
130 print(prefix, line)
131 count += 1
132
133 # Find 'contributed-under:' at the start of a line ignoring case and
134 # requires ':' to be present. Matches if there is white space before
135 # the tag or between the tag and the ':'.
136 contributed_under_re = \
137 re.compile(r'^\s*contributed-under\s*:', re.MULTILINE|re.IGNORECASE)
138
139 def check_contributed_under(self):
140 match = self.contributed_under_re.search(self.msg)
141 if match is not None:
142 self.error('Contributed-under! (Note: this must be ' +
143 'removed by the code contributor!)')
144
145 @staticmethod
146 def make_signature_re(sig, re_input=False):
147 if re_input:
148 sub_re = sig
149 else:
150 sub_re = sig.replace('-', r'[-\s]+')
151 re_str = (r'^(?P<tag>' + sub_re +
152 r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
153 try:
154 return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
155 except Exception:
156 print("Tried to compile re:", re_str)
157 raise
158
159 sig_block_re = \
160 re.compile(r'''^
161 (?: (?P<tag>[^:]+) \s* : \s*
162 (?P<value>\S.*?) )
163 |
164 (?: \[ (?P<updater>[^:]+) \s* : \s*
165 (?P<note>.+?) \s* \] )
166 \s* $''',
167 re.VERBOSE | re.MULTILINE)
168
169 def find_signatures(self, sig):
170 if not sig.endswith('-by') and sig != 'Cc':
171 sig += '-by'
172 regex = self.make_signature_re(sig)
173
174 sigs = regex.findall(self.msg)
175
176 bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
177 for s in bad_case_sigs:
178 self.error("'" +s[0] + "' should be '" + sig + "'")
179
180 for s in sigs:
181 if s[1] != '':
182 self.error('There should be no spaces between ' + sig +
183 " and the ':'")
184 if s[2] != ' ':
185 self.error("There should be a space after '" + sig + ":'")
186
187 EmailAddressCheck(s[3], sig)
188
189 return sigs
190
191 def check_signed_off_by(self):
192 sob='Signed-off-by'
193 if self.msg.find(sob) < 0:
194 self.error('Missing Signed-off-by! (Note: this must be ' +
195 'added by the code contributor!)')
196 return
197
198 sobs = self.find_signatures('Signed-off')
199
200 if len(sobs) == 0:
201 self.error('Invalid Signed-off-by format!')
202 return
203
204 sig_types = (
205 'Reviewed',
206 'Reported',
207 'Tested',
208 'Suggested',
209 'Acked',
210 'Cc'
211 )
212
213 def check_misc_signatures(self):
214 for sig in self.sig_types:
215 self.find_signatures(sig)
216
217 cve_re = re.compile('CVE-[0-9]{4}-[0-9]{5}[^0-9]')
218
219 def check_overall_format(self):
220 lines = self.msg.splitlines()
221
222 if len(lines) >= 1 and lines[0].endswith('\r\n'):
223 empty_line = '\r\n'
224 else:
225 empty_line = '\n'
226
227 lines.insert(0, empty_line)
228 lines.insert(0, self.subject + empty_line)
229
230 count = len(lines)
231
232 if count <= 0:
233 self.error('Empty commit message!')
234 return
235
236 if count >= 1 and re.search(self.cve_re, lines[0]):
237 #
238 # If CVE-xxxx-xxxxx is present in subject line, then limit length of
239 # subject line to 92 characters
240 #
241 if len(lines[0].rstrip()) >= 93:
242 self.error(
243 'First line of commit message (subject line) is too long (%d >= 93).' %
244 (len(lines[0].rstrip()))
245 )
246 else:
247 #
248 # If CVE-xxxx-xxxxx is not present in subject line, then limit
249 # length of subject line to 75 characters
250 #
251 if len(lines[0].rstrip()) >= 76:
252 self.error(
253 'First line of commit message (subject line) is too long (%d >= 76).' %
254 (len(lines[0].rstrip()))
255 )
256
257 if count >= 1 and len(lines[0].strip()) == 0:
258 self.error('First line of commit message (subject line) ' +
259 'is empty.')
260
261 if count >= 2 and lines[1].strip() != '':
262 self.error('Second line of commit message should be ' +
263 'empty.')
264
265 for i in range(2, count):
266 if (len(lines[i]) >= 76 and
267 len(lines[i].split()) > 1 and
268 not lines[i].startswith('git-svn-id:')):
269 #
270 # Print a warning if body line is longer than 75 characters
271 #
272 print(
273 'WARNING - Line %d of commit message is too long (%d >= 76).' %
274 (i + 1, len(lines[i]))
275 )
276 print(lines[i])
277
278 last_sig_line = None
279 for i in range(count - 1, 0, -1):
280 line = lines[i]
281 mo = self.sig_block_re.match(line)
282 if mo is None:
283 if line.strip() == '':
284 break
285 elif last_sig_line is not None:
286 err2 = 'Add empty line before "%s"?' % last_sig_line
287 self.error('The line before the signature block ' +
288 'should be empty', err2)
289 else:
290 self.error('The signature block was not found')
291 break
292 last_sig_line = line.strip()
293
294 (START, PRE_PATCH, PATCH) = range(3)
295
296 class GitDiffCheck:
297 """Checks the contents of a git diff."""
298
299 def __init__(self, diff):
300 self.ok = True
301 self.format_ok = True
302 self.lines = diff.splitlines(True)
303 self.count = len(self.lines)
304 self.line_num = 0
305 self.state = START
306 self.new_bin = []
307 while self.line_num < self.count and self.format_ok:
308 line_num = self.line_num
309 self.run()
310 assert(self.line_num > line_num)
311 self.report_message_result()
312
313 def report_message_result(self):
314 if Verbose.level < Verbose.NORMAL:
315 return
316 if self.ok:
317 print('The code passed all checks.')
318 if self.new_bin:
319 print('\nWARNING - The following binary files will be added ' +
320 'into the repository:')
321 for binary in self.new_bin:
322 print(' ' + binary)
323
324 def run(self):
325 line = self.lines[self.line_num]
326
327 if self.state in (PRE_PATCH, PATCH):
328 if line.startswith('diff --git'):
329 self.state = START
330 if self.state == PATCH:
331 if line.startswith('@@ '):
332 self.state = PRE_PATCH
333 elif len(line) >= 1 and line[0] not in ' -+' and \
334 not line.startswith('\r\n') and \
335 not line.startswith(r'\ No newline ') and not self.binary:
336 for line in self.lines[self.line_num + 1:]:
337 if line.startswith('diff --git'):
338 self.format_error('diff found after end of patch')
339 break
340 self.line_num = self.count
341 return
342
343 if self.state == START:
344 if line.startswith('diff --git'):
345 self.state = PRE_PATCH
346 self.filename = line[13:].split(' ', 1)[0]
347 self.is_newfile = False
348 self.force_crlf = True
349 self.force_notabs = True
350 if self.filename.endswith('.sh'):
351 #
352 # Do not enforce CR/LF line endings for linux shell scripts.
353 #
354 self.force_crlf = False
355 if self.filename == '.gitmodules':
356 #
357 # .gitmodules is updated by git and uses tabs and LF line
358 # endings. Do not enforce no tabs and do not enforce
359 # CR/LF line endings.
360 #
361 self.force_crlf = False
362 self.force_notabs = False
363 elif len(line.rstrip()) != 0:
364 self.format_error("didn't find diff command")
365 self.line_num += 1
366 elif self.state == PRE_PATCH:
367 if line.startswith('@@ '):
368 self.state = PATCH
369 self.binary = False
370 elif line.startswith('GIT binary patch') or \
371 line.startswith('Binary files'):
372 self.state = PATCH
373 self.binary = True
374 if self.is_newfile:
375 self.new_bin.append(self.filename)
376 elif line.startswith('new file mode 160000'):
377 #
378 # New submodule. Do not enforce CR/LF line endings
379 #
380 self.force_crlf = False
381 else:
382 ok = False
383 self.is_newfile = self.newfile_prefix_re.match(line)
384 for pfx in self.pre_patch_prefixes:
385 if line.startswith(pfx):
386 ok = True
387 if not ok:
388 self.format_error("didn't find diff hunk marker (@@)")
389 self.line_num += 1
390 elif self.state == PATCH:
391 if self.binary:
392 pass
393 elif line.startswith('-'):
394 pass
395 elif line.startswith('+'):
396 self.check_added_line(line[1:])
397 elif line.startswith('\r\n'):
398 pass
399 elif line.startswith(r'\ No newline '):
400 pass
401 elif not line.startswith(' '):
402 self.format_error("unexpected patch line")
403 self.line_num += 1
404
405 pre_patch_prefixes = (
406 '--- ',
407 '+++ ',
408 'index ',
409 'new file ',
410 'deleted file ',
411 'old mode ',
412 'new mode ',
413 'similarity index ',
414 'copy from ',
415 'copy to ',
416 'rename ',
417 )
418
419 line_endings = ('\r\n', '\n\r', '\n', '\r')
420
421 newfile_prefix_re = \
422 re.compile(r'''^
423 index\ 0+\.\.
424 ''',
425 re.VERBOSE)
426
427 def added_line_error(self, msg, line):
428 lines = [ msg ]
429 if self.filename is not None:
430 lines.append('File: ' + self.filename)
431 lines.append('Line: ' + line)
432
433 self.error(*lines)
434
435 old_debug_re = \
436 re.compile(r'''
437 DEBUG \s* \( \s* \( \s*
438 (?: DEBUG_[A-Z_]+ \s* \| \s*)*
439 EFI_D_ ([A-Z_]+)
440 ''',
441 re.VERBOSE)
442
443 def check_added_line(self, line):
444 eol = ''
445 for an_eol in self.line_endings:
446 if line.endswith(an_eol):
447 eol = an_eol
448 line = line[:-len(eol)]
449
450 stripped = line.rstrip()
451
452 if self.force_crlf and eol != '\r\n':
453 self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
454 line)
455 if self.force_notabs and '\t' in line:
456 self.added_line_error('Tab character used', line)
457 if len(stripped) < len(line):
458 self.added_line_error('Trailing whitespace found', line)
459
460 mo = self.old_debug_re.search(line)
461 if mo is not None:
462 self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
463 'but DEBUG_' + mo.group(1) +
464 ' is now recommended', line)
465
466 split_diff_re = re.compile(r'''
467 (?P<cmd>
468 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
469 )
470 (?P<index>
471 ^ index \s+ .+ $
472 )
473 ''',
474 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
475
476 def format_error(self, err):
477 self.format_ok = False
478 err = 'Patch format error: ' + err
479 err2 = 'Line: ' + self.lines[self.line_num].rstrip()
480 self.error(err, err2)
481
482 def error(self, *err):
483 if self.ok and Verbose.level > Verbose.ONELINE:
484 print('Code format is not valid:')
485 self.ok = False
486 if Verbose.level < Verbose.NORMAL:
487 return
488 count = 0
489 for line in err:
490 prefix = (' *', ' ')[count > 0]
491 print(prefix, line)
492 count += 1
493
494 class CheckOnePatch:
495 """Checks the contents of a git email formatted patch.
496
497 Various checks are performed on both the commit message and the
498 patch content.
499 """
500
501 def __init__(self, name, patch):
502 self.patch = patch
503 self.find_patch_pieces()
504
505 email_check = EmailAddressCheck(self.author_email, 'Author')
506 email_ok = email_check.ok
507
508 msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
509 msg_ok = msg_check.ok
510
511 diff_ok = True
512 if self.diff is not None:
513 diff_check = GitDiffCheck(self.diff)
514 diff_ok = diff_check.ok
515
516 self.ok = email_ok and msg_ok and diff_ok
517
518 if Verbose.level == Verbose.ONELINE:
519 if self.ok:
520 result = 'ok'
521 else:
522 result = list()
523 if not msg_ok:
524 result.append('commit message')
525 if not diff_ok:
526 result.append('diff content')
527 result = 'bad ' + ' and '.join(result)
528 print(name, result)
529
530
531 git_diff_re = re.compile(r'''
532 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
533 ''',
534 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
535
536 stat_re = \
537 re.compile(r'''
538 (?P<commit_message> [\s\S\r\n]* )
539 (?P<stat>
540 ^ --- $ [\r\n]+
541 (?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
542 $ [\r\n]+ )+
543 [\s\S\r\n]+
544 )
545 ''',
546 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
547
548 subject_prefix_re = \
549 re.compile(r'''^
550 \s* (\[
551 [^\[\]]* # Allow all non-brackets
552 \])* \s*
553 ''',
554 re.VERBOSE)
555
556 def find_patch_pieces(self):
557 if sys.version_info < (3, 0):
558 patch = self.patch.encode('ascii', 'ignore')
559 else:
560 patch = self.patch
561
562 self.commit_msg = None
563 self.stat = None
564 self.commit_subject = None
565 self.commit_prefix = None
566 self.diff = None
567
568 if patch.startswith('diff --git'):
569 self.diff = patch
570 return
571
572 pmail = email.message_from_string(patch)
573 parts = list(pmail.walk())
574 assert(len(parts) == 1)
575 assert(parts[0].get_content_type() == 'text/plain')
576 content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
577
578 mo = self.git_diff_re.search(content)
579 if mo is not None:
580 self.diff = content[mo.start():]
581 content = content[:mo.start()]
582
583 mo = self.stat_re.search(content)
584 if mo is None:
585 self.commit_msg = content
586 else:
587 self.stat = mo.group('stat')
588 self.commit_msg = mo.group('commit_message')
589 #
590 # Parse subject line from email header. The subject line may be
591 # composed of multiple parts with different encodings. Decode and
592 # combine all the parts to produce a single string with the contents of
593 # the decoded subject line.
594 #
595 parts = email.header.decode_header(pmail.get('subject'))
596 subject = ''
597 for (part, encoding) in parts:
598 if encoding:
599 part = part.decode(encoding)
600 else:
601 try:
602 part = part.decode()
603 except:
604 pass
605 subject = subject + part
606
607 self.commit_subject = subject.replace('\r\n', '')
608 self.commit_subject = self.commit_subject.replace('\n', '')
609 self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
610
611 self.author_email = pmail['from']
612
613 class CheckGitCommits:
614 """Reads patches from git based on the specified git revision range.
615
616 The patches are read from git, and then checked.
617 """
618
619 def __init__(self, rev_spec, max_count):
620 commits = self.read_commit_list_from_git(rev_spec, max_count)
621 if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
622 commits = [ rev_spec ]
623 self.ok = True
624 blank_line = False
625 for commit in commits:
626 if Verbose.level > Verbose.ONELINE:
627 if blank_line:
628 print()
629 else:
630 blank_line = True
631 print('Checking git commit:', commit)
632 email = self.read_committer_email_address_from_git(commit)
633 self.ok &= EmailAddressCheck(email, 'Committer').ok
634 patch = self.read_patch_from_git(commit)
635 self.ok &= CheckOnePatch(commit, patch).ok
636 if not commits:
637 print("Couldn't find commit matching: '{}'".format(rev_spec))
638
639 def read_commit_list_from_git(self, rev_spec, max_count):
640 # Run git to get the commit patch
641 cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
642 if max_count is not None:
643 cmd.append('--max-count=' + str(max_count))
644 cmd.append(rev_spec)
645 out = self.run_git(*cmd)
646 return out.split() if out else []
647
648 def read_patch_from_git(self, commit):
649 # Run git to get the commit patch
650 return self.run_git('show', '--pretty=email', '--no-textconv',
651 '--no-use-mailmap', commit)
652
653 def read_committer_email_address_from_git(self, commit):
654 # Run git to get the committer email
655 return self.run_git('show', '--pretty=%cn <%ce>', '--no-patch',
656 '--no-use-mailmap', commit)
657
658 def run_git(self, *args):
659 cmd = [ 'git' ]
660 cmd += args
661 p = subprocess.Popen(cmd,
662 stdout=subprocess.PIPE,
663 stderr=subprocess.STDOUT)
664 Result = p.communicate()
665 return Result[0].decode('utf-8', 'ignore') if Result[0] and Result[0].find(b"fatal")!=0 else None
666
667 class CheckOnePatchFile:
668 """Performs a patch check for a single file.
669
670 stdin is used when the filename is '-'.
671 """
672
673 def __init__(self, patch_filename):
674 if patch_filename == '-':
675 patch = sys.stdin.read()
676 patch_filename = 'stdin'
677 else:
678 f = open(patch_filename, 'rb')
679 patch = f.read().decode('utf-8', 'ignore')
680 f.close()
681 if Verbose.level > Verbose.ONELINE:
682 print('Checking patch file:', patch_filename)
683 self.ok = CheckOnePatch(patch_filename, patch).ok
684
685 class CheckOneArg:
686 """Performs a patch check for a single command line argument.
687
688 The argument will be handed off to a file or git-commit based
689 checker.
690 """
691
692 def __init__(self, param, max_count=None):
693 self.ok = True
694 if param == '-' or os.path.exists(param):
695 checker = CheckOnePatchFile(param)
696 else:
697 checker = CheckGitCommits(param, max_count)
698 self.ok = checker.ok
699
700 class PatchCheckApp:
701 """Checks patches based on the command line arguments."""
702
703 def __init__(self):
704 self.parse_options()
705 patches = self.args.patches
706
707 if len(patches) == 0:
708 patches = [ 'HEAD' ]
709
710 self.ok = True
711 self.count = None
712 for patch in patches:
713 self.process_one_arg(patch)
714
715 if self.count is not None:
716 self.process_one_arg('HEAD')
717
718 if self.ok:
719 self.retval = 0
720 else:
721 self.retval = -1
722
723 def process_one_arg(self, arg):
724 if len(arg) >= 2 and arg[0] == '-':
725 try:
726 self.count = int(arg[1:])
727 return
728 except ValueError:
729 pass
730 self.ok &= CheckOneArg(arg, self.count).ok
731 self.count = None
732
733 def parse_options(self):
734 parser = argparse.ArgumentParser(description=__copyright__)
735 parser.add_argument('--version', action='version',
736 version='%(prog)s ' + VersionNumber)
737 parser.add_argument('patches', nargs='*',
738 help='[patch file | git rev list]')
739 group = parser.add_mutually_exclusive_group()
740 group.add_argument("--oneline",
741 action="store_true",
742 help="Print one result per line")
743 group.add_argument("--silent",
744 action="store_true",
745 help="Print nothing")
746 self.args = parser.parse_args()
747 if self.args.oneline:
748 Verbose.level = Verbose.ONELINE
749 if self.args.silent:
750 Verbose.level = Verbose.SILENT
751
752 if __name__ == "__main__":
753 sys.exit(PatchCheckApp().retval)