]> git.proxmox.com Git - mirror_edk2.git/blob - BaseTools/Scripts/PatchCheck.py
173d4517e0a80d5e62b12c1c9b9b798e567f7d58
[mirror_edk2.git] / BaseTools / Scripts / PatchCheck.py
1 ## @file
2 # Check a patch for various format issues
3 #
4 # Copyright (c) 2015 - 2020, Intel Corporation. All rights reserved.<BR>
5 # Copyright (C) 2020, Red Hat, Inc.<BR>
6 #
7 # SPDX-License-Identifier: BSD-2-Clause-Patent
8 #
9
10 from __future__ import print_function
11
12 VersionNumber = '0.1'
13 __copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation All rights reserved."
14
15 import email
16 import argparse
17 import os
18 import re
19 import subprocess
20 import sys
21
22 class Verbose:
23 SILENT, ONELINE, NORMAL = range(3)
24 level = NORMAL
25
26 class EmailAddressCheck:
27 """Checks an email address."""
28
29 def __init__(self, email, description):
30 self.ok = True
31
32 if email is None:
33 self.error('Email address is missing!')
34 return
35 if description is None:
36 self.error('Email description is missing!')
37 return
38
39 self.description = "'" + description + "'"
40 self.check_email_address(email)
41
42 def error(self, *err):
43 if self.ok and Verbose.level > Verbose.ONELINE:
44 print('The ' + self.description + ' email address is not valid:')
45 self.ok = False
46 if Verbose.level < Verbose.NORMAL:
47 return
48 count = 0
49 for line in err:
50 prefix = (' *', ' ')[count > 0]
51 print(prefix, line)
52 count += 1
53
54 email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
55 re.MULTILINE|re.IGNORECASE)
56
57 def check_email_address(self, email):
58 email = email.strip()
59 mo = self.email_re1.match(email)
60 if mo is None:
61 self.error("Email format is invalid: " + email.strip())
62 return
63
64 name = mo.group(1).strip()
65 if name == '':
66 self.error("Name is not provided with email address: " +
67 email)
68 else:
69 quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
70 if name.find(',') >= 0 and not quoted:
71 self.error('Add quotes (") around name with a comma: ' +
72 name)
73
74 if mo.group(2) == '':
75 self.error("There should be a space between the name and " +
76 "email address: " + email)
77
78 if mo.group(3).find(' ') >= 0:
79 self.error("The email address cannot contain a space: " +
80 mo.group(3))
81
82 class CommitMessageCheck:
83 """Checks the contents of a git commit message."""
84
85 def __init__(self, subject, message):
86 self.ok = True
87
88 if subject is None and message is None:
89 self.error('Commit message is missing!')
90 return
91
92 self.subject = subject
93 self.msg = message
94
95 print (subject)
96
97 self.check_contributed_under()
98 self.check_signed_off_by()
99 self.check_misc_signatures()
100 self.check_overall_format()
101 self.report_message_result()
102
103 url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
104
105 def report_message_result(self):
106 if Verbose.level < Verbose.NORMAL:
107 return
108 if self.ok:
109 # All checks passed
110 return_code = 0
111 print('The commit message format passed all checks.')
112 else:
113 return_code = 1
114 if not self.ok:
115 print(self.url)
116
117 def error(self, *err):
118 if self.ok and Verbose.level > Verbose.ONELINE:
119 print('The commit message format is not valid:')
120 self.ok = False
121 if Verbose.level < Verbose.NORMAL:
122 return
123 count = 0
124 for line in err:
125 prefix = (' *', ' ')[count > 0]
126 print(prefix, line)
127 count += 1
128
129 # Find 'contributed-under:' at the start of a line ignoring case and
130 # requires ':' to be present. Matches if there is white space before
131 # the tag or between the tag and the ':'.
132 contributed_under_re = \
133 re.compile(r'^\s*contributed-under\s*:', re.MULTILINE|re.IGNORECASE)
134
135 def check_contributed_under(self):
136 match = self.contributed_under_re.search(self.msg)
137 if match is not None:
138 self.error('Contributed-under! (Note: this must be ' +
139 'removed by the code contributor!)')
140
141 @staticmethod
142 def make_signature_re(sig, re_input=False):
143 if re_input:
144 sub_re = sig
145 else:
146 sub_re = sig.replace('-', r'[-\s]+')
147 re_str = (r'^(?P<tag>' + sub_re +
148 r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
149 try:
150 return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
151 except Exception:
152 print("Tried to compile re:", re_str)
153 raise
154
155 sig_block_re = \
156 re.compile(r'''^
157 (?: (?P<tag>[^:]+) \s* : \s*
158 (?P<value>\S.*?) )
159 |
160 (?: \[ (?P<updater>[^:]+) \s* : \s*
161 (?P<note>.+?) \s* \] )
162 \s* $''',
163 re.VERBOSE | re.MULTILINE)
164
165 def find_signatures(self, sig):
166 if not sig.endswith('-by') and sig != 'Cc':
167 sig += '-by'
168 regex = self.make_signature_re(sig)
169
170 sigs = regex.findall(self.msg)
171
172 bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
173 for s in bad_case_sigs:
174 self.error("'" +s[0] + "' should be '" + sig + "'")
175
176 for s in sigs:
177 if s[1] != '':
178 self.error('There should be no spaces between ' + sig +
179 " and the ':'")
180 if s[2] != ' ':
181 self.error("There should be a space after '" + sig + ":'")
182
183 EmailAddressCheck(s[3], sig)
184
185 return sigs
186
187 def check_signed_off_by(self):
188 sob='Signed-off-by'
189 if self.msg.find(sob) < 0:
190 self.error('Missing Signed-off-by! (Note: this must be ' +
191 'added by the code contributor!)')
192 return
193
194 sobs = self.find_signatures('Signed-off')
195
196 if len(sobs) == 0:
197 self.error('Invalid Signed-off-by format!')
198 return
199
200 sig_types = (
201 'Reviewed',
202 'Reported',
203 'Tested',
204 'Suggested',
205 'Acked',
206 'Cc'
207 )
208
209 def check_misc_signatures(self):
210 for sig in self.sig_types:
211 self.find_signatures(sig)
212
213 cve_re = re.compile('CVE-[0-9]{4}-[0-9]{5}[^0-9]')
214
215 def check_overall_format(self):
216 lines = self.msg.splitlines()
217
218 if len(lines) >= 1 and lines[0].endswith('\r\n'):
219 empty_line = '\r\n'
220 else:
221 empty_line = '\n'
222
223 lines.insert(0, empty_line)
224 lines.insert(0, self.subject + empty_line)
225
226 count = len(lines)
227
228 if count <= 0:
229 self.error('Empty commit message!')
230 return
231
232 if count >= 1 and re.search(self.cve_re, lines[0]):
233 #
234 # If CVE-xxxx-xxxxx is present in subject line, then limit length of
235 # subject line to 92 characters
236 #
237 if len(lines[0].rstrip()) >= 93:
238 self.error(
239 'First line of commit message (subject line) is too long (%d >= 93).' %
240 (len(lines[0].rstrip()))
241 )
242 else:
243 #
244 # If CVE-xxxx-xxxxx is not present in subject line, then limit
245 # length of subject line to 75 characters
246 #
247 if len(lines[0].rstrip()) >= 76:
248 self.error(
249 'First line of commit message (subject line) is too long (%d >= 76).' %
250 (len(lines[0].rstrip()))
251 )
252
253 if count >= 1 and len(lines[0].strip()) == 0:
254 self.error('First line of commit message (subject line) ' +
255 'is empty.')
256
257 if count >= 2 and lines[1].strip() != '':
258 self.error('Second line of commit message should be ' +
259 'empty.')
260
261 for i in range(2, count):
262 if (len(lines[i]) >= 76 and
263 len(lines[i].split()) > 1 and
264 not lines[i].startswith('git-svn-id:')):
265 #
266 # Print a warning if body line is longer than 75 characters
267 #
268 print(
269 'WARNING - Line %d of commit message is too long (%d >= 76).' %
270 (i + 1, len(lines[i]))
271 )
272 print(lines[i])
273
274 last_sig_line = None
275 for i in range(count - 1, 0, -1):
276 line = lines[i]
277 mo = self.sig_block_re.match(line)
278 if mo is None:
279 if line.strip() == '':
280 break
281 elif last_sig_line is not None:
282 err2 = 'Add empty line before "%s"?' % last_sig_line
283 self.error('The line before the signature block ' +
284 'should be empty', err2)
285 else:
286 self.error('The signature block was not found')
287 break
288 last_sig_line = line.strip()
289
290 (START, PRE_PATCH, PATCH) = range(3)
291
292 class GitDiffCheck:
293 """Checks the contents of a git diff."""
294
295 def __init__(self, diff):
296 self.ok = True
297 self.format_ok = True
298 self.lines = diff.splitlines(True)
299 self.count = len(self.lines)
300 self.line_num = 0
301 self.state = START
302 self.new_bin = []
303 while self.line_num < self.count and self.format_ok:
304 line_num = self.line_num
305 self.run()
306 assert(self.line_num > line_num)
307 self.report_message_result()
308
309 def report_message_result(self):
310 if Verbose.level < Verbose.NORMAL:
311 return
312 if self.ok:
313 print('The code passed all checks.')
314 if self.new_bin:
315 print('\nWARNING - The following binary files will be added ' +
316 'into the repository:')
317 for binary in self.new_bin:
318 print(' ' + binary)
319
320 def run(self):
321 line = self.lines[self.line_num]
322
323 if self.state in (PRE_PATCH, PATCH):
324 if line.startswith('diff --git'):
325 self.state = START
326 if self.state == PATCH:
327 if line.startswith('@@ '):
328 self.state = PRE_PATCH
329 elif len(line) >= 1 and line[0] not in ' -+' and \
330 not line.startswith('\r\n') and \
331 not line.startswith(r'\ No newline ') and not self.binary:
332 for line in self.lines[self.line_num + 1:]:
333 if line.startswith('diff --git'):
334 self.format_error('diff found after end of patch')
335 break
336 self.line_num = self.count
337 return
338
339 if self.state == START:
340 if line.startswith('diff --git'):
341 self.state = PRE_PATCH
342 self.filename = line[13:].split(' ', 1)[0]
343 self.is_newfile = False
344 self.force_crlf = not self.filename.endswith('.sh')
345 elif len(line.rstrip()) != 0:
346 self.format_error("didn't find diff command")
347 self.line_num += 1
348 elif self.state == PRE_PATCH:
349 if line.startswith('@@ '):
350 self.state = PATCH
351 self.binary = False
352 elif line.startswith('GIT binary patch') or \
353 line.startswith('Binary files'):
354 self.state = PATCH
355 self.binary = True
356 if self.is_newfile:
357 self.new_bin.append(self.filename)
358 else:
359 ok = False
360 self.is_newfile = self.newfile_prefix_re.match(line)
361 for pfx in self.pre_patch_prefixes:
362 if line.startswith(pfx):
363 ok = True
364 if not ok:
365 self.format_error("didn't find diff hunk marker (@@)")
366 self.line_num += 1
367 elif self.state == PATCH:
368 if self.binary:
369 pass
370 elif line.startswith('-'):
371 pass
372 elif line.startswith('+'):
373 self.check_added_line(line[1:])
374 elif line.startswith('\r\n'):
375 pass
376 elif line.startswith(r'\ No newline '):
377 pass
378 elif not line.startswith(' '):
379 self.format_error("unexpected patch line")
380 self.line_num += 1
381
382 pre_patch_prefixes = (
383 '--- ',
384 '+++ ',
385 'index ',
386 'new file ',
387 'deleted file ',
388 'old mode ',
389 'new mode ',
390 'similarity index ',
391 'copy from ',
392 'copy to ',
393 'rename ',
394 )
395
396 line_endings = ('\r\n', '\n\r', '\n', '\r')
397
398 newfile_prefix_re = \
399 re.compile(r'''^
400 index\ 0+\.\.
401 ''',
402 re.VERBOSE)
403
404 def added_line_error(self, msg, line):
405 lines = [ msg ]
406 if self.filename is not None:
407 lines.append('File: ' + self.filename)
408 lines.append('Line: ' + line)
409
410 self.error(*lines)
411
412 old_debug_re = \
413 re.compile(r'''
414 DEBUG \s* \( \s* \( \s*
415 (?: DEBUG_[A-Z_]+ \s* \| \s*)*
416 EFI_D_ ([A-Z_]+)
417 ''',
418 re.VERBOSE)
419
420 def check_added_line(self, line):
421 eol = ''
422 for an_eol in self.line_endings:
423 if line.endswith(an_eol):
424 eol = an_eol
425 line = line[:-len(eol)]
426
427 stripped = line.rstrip()
428
429 if self.force_crlf and eol != '\r\n':
430 self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
431 line)
432 if '\t' in line:
433 self.added_line_error('Tab character used', line)
434 if len(stripped) < len(line):
435 self.added_line_error('Trailing whitespace found', line)
436
437 mo = self.old_debug_re.search(line)
438 if mo is not None:
439 self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
440 'but DEBUG_' + mo.group(1) +
441 ' is now recommended', line)
442
443 split_diff_re = re.compile(r'''
444 (?P<cmd>
445 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
446 )
447 (?P<index>
448 ^ index \s+ .+ $
449 )
450 ''',
451 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
452
453 def format_error(self, err):
454 self.format_ok = False
455 err = 'Patch format error: ' + err
456 err2 = 'Line: ' + self.lines[self.line_num].rstrip()
457 self.error(err, err2)
458
459 def error(self, *err):
460 if self.ok and Verbose.level > Verbose.ONELINE:
461 print('Code format is not valid:')
462 self.ok = False
463 if Verbose.level < Verbose.NORMAL:
464 return
465 count = 0
466 for line in err:
467 prefix = (' *', ' ')[count > 0]
468 print(prefix, line)
469 count += 1
470
471 class CheckOnePatch:
472 """Checks the contents of a git email formatted patch.
473
474 Various checks are performed on both the commit message and the
475 patch content.
476 """
477
478 def __init__(self, name, patch):
479 self.patch = patch
480 self.find_patch_pieces()
481
482 email_check = EmailAddressCheck(self.author_email, 'Author')
483 email_ok = email_check.ok
484
485 msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
486 msg_ok = msg_check.ok
487
488 diff_ok = True
489 if self.diff is not None:
490 diff_check = GitDiffCheck(self.diff)
491 diff_ok = diff_check.ok
492
493 self.ok = email_ok and msg_ok and diff_ok
494
495 if Verbose.level == Verbose.ONELINE:
496 if self.ok:
497 result = 'ok'
498 else:
499 result = list()
500 if not msg_ok:
501 result.append('commit message')
502 if not diff_ok:
503 result.append('diff content')
504 result = 'bad ' + ' and '.join(result)
505 print(name, result)
506
507
508 git_diff_re = re.compile(r'''
509 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
510 ''',
511 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
512
513 stat_re = \
514 re.compile(r'''
515 (?P<commit_message> [\s\S\r\n]* )
516 (?P<stat>
517 ^ --- $ [\r\n]+
518 (?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
519 $ [\r\n]+ )+
520 [\s\S\r\n]+
521 )
522 ''',
523 re.IGNORECASE | re.VERBOSE | re.MULTILINE)
524
525 subject_prefix_re = \
526 re.compile(r'''^
527 \s* (\[
528 [^\[\]]* # Allow all non-brackets
529 \])* \s*
530 ''',
531 re.VERBOSE)
532
533 def find_patch_pieces(self):
534 if sys.version_info < (3, 0):
535 patch = self.patch.encode('ascii', 'ignore')
536 else:
537 patch = self.patch
538
539 self.commit_msg = None
540 self.stat = None
541 self.commit_subject = None
542 self.commit_prefix = None
543 self.diff = None
544
545 if patch.startswith('diff --git'):
546 self.diff = patch
547 return
548
549 pmail = email.message_from_string(patch)
550 parts = list(pmail.walk())
551 assert(len(parts) == 1)
552 assert(parts[0].get_content_type() == 'text/plain')
553 content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
554
555 mo = self.git_diff_re.search(content)
556 if mo is not None:
557 self.diff = content[mo.start():]
558 content = content[:mo.start()]
559
560 mo = self.stat_re.search(content)
561 if mo is None:
562 self.commit_msg = content
563 else:
564 self.stat = mo.group('stat')
565 self.commit_msg = mo.group('commit_message')
566 #
567 # Parse subject line from email header. The subject line may be
568 # composed of multiple parts with different encodings. Decode and
569 # combine all the parts to produce a single string with the contents of
570 # the decoded subject line.
571 #
572 parts = email.header.decode_header(pmail.get('subject'))
573 subject = ''
574 for (part, encoding) in parts:
575 if encoding:
576 part = part.decode(encoding)
577 else:
578 try:
579 part = part.decode()
580 except:
581 pass
582 subject = subject + part
583
584 self.commit_subject = subject.replace('\r\n', '')
585 self.commit_subject = self.commit_subject.replace('\n', '')
586 self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
587
588 self.author_email = pmail['from']
589
590 class CheckGitCommits:
591 """Reads patches from git based on the specified git revision range.
592
593 The patches are read from git, and then checked.
594 """
595
596 def __init__(self, rev_spec, max_count):
597 commits = self.read_commit_list_from_git(rev_spec, max_count)
598 if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
599 commits = [ rev_spec ]
600 self.ok = True
601 blank_line = False
602 for commit in commits:
603 if Verbose.level > Verbose.ONELINE:
604 if blank_line:
605 print()
606 else:
607 blank_line = True
608 print('Checking git commit:', commit)
609 email = self.read_committer_email_address_from_git(commit)
610 self.ok &= EmailAddressCheck(email, 'Committer').ok
611 patch = self.read_patch_from_git(commit)
612 self.ok &= CheckOnePatch(commit, patch).ok
613 if not commits:
614 print("Couldn't find commit matching: '{}'".format(rev_spec))
615
616 def read_commit_list_from_git(self, rev_spec, max_count):
617 # Run git to get the commit patch
618 cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
619 if max_count is not None:
620 cmd.append('--max-count=' + str(max_count))
621 cmd.append(rev_spec)
622 out = self.run_git(*cmd)
623 return out.split() if out else []
624
625 def read_patch_from_git(self, commit):
626 # Run git to get the commit patch
627 return self.run_git('show', '--pretty=email', '--no-textconv', commit)
628
629 def read_committer_email_address_from_git(self, commit):
630 # Run git to get the committer email
631 return self.run_git('show', '--pretty=%cn <%ce>', '--no-patch', commit)
632
633 def run_git(self, *args):
634 cmd = [ 'git' ]
635 cmd += args
636 p = subprocess.Popen(cmd,
637 stdout=subprocess.PIPE,
638 stderr=subprocess.STDOUT)
639 Result = p.communicate()
640 return Result[0].decode('utf-8', 'ignore') if Result[0] and Result[0].find(b"fatal")!=0 else None
641
642 class CheckOnePatchFile:
643 """Performs a patch check for a single file.
644
645 stdin is used when the filename is '-'.
646 """
647
648 def __init__(self, patch_filename):
649 if patch_filename == '-':
650 patch = sys.stdin.read()
651 patch_filename = 'stdin'
652 else:
653 f = open(patch_filename, 'rb')
654 patch = f.read().decode('utf-8', 'ignore')
655 f.close()
656 if Verbose.level > Verbose.ONELINE:
657 print('Checking patch file:', patch_filename)
658 self.ok = CheckOnePatch(patch_filename, patch).ok
659
660 class CheckOneArg:
661 """Performs a patch check for a single command line argument.
662
663 The argument will be handed off to a file or git-commit based
664 checker.
665 """
666
667 def __init__(self, param, max_count=None):
668 self.ok = True
669 if param == '-' or os.path.exists(param):
670 checker = CheckOnePatchFile(param)
671 else:
672 checker = CheckGitCommits(param, max_count)
673 self.ok = checker.ok
674
675 class PatchCheckApp:
676 """Checks patches based on the command line arguments."""
677
678 def __init__(self):
679 self.parse_options()
680 patches = self.args.patches
681
682 if len(patches) == 0:
683 patches = [ 'HEAD' ]
684
685 self.ok = True
686 self.count = None
687 for patch in patches:
688 self.process_one_arg(patch)
689
690 if self.count is not None:
691 self.process_one_arg('HEAD')
692
693 if self.ok:
694 self.retval = 0
695 else:
696 self.retval = -1
697
698 def process_one_arg(self, arg):
699 if len(arg) >= 2 and arg[0] == '-':
700 try:
701 self.count = int(arg[1:])
702 return
703 except ValueError:
704 pass
705 self.ok &= CheckOneArg(arg, self.count).ok
706 self.count = None
707
708 def parse_options(self):
709 parser = argparse.ArgumentParser(description=__copyright__)
710 parser.add_argument('--version', action='version',
711 version='%(prog)s ' + VersionNumber)
712 parser.add_argument('patches', nargs='*',
713 help='[patch file | git rev list]')
714 group = parser.add_mutually_exclusive_group()
715 group.add_argument("--oneline",
716 action="store_true",
717 help="Print one result per line")
718 group.add_argument("--silent",
719 action="store_true",
720 help="Print nothing")
721 self.args = parser.parse_args()
722 if self.args.oneline:
723 Verbose.level = Verbose.ONELINE
724 if self.args.silent:
725 Verbose.level = Verbose.SILENT
726
727 if __name__ == "__main__":
728 sys.exit(PatchCheckApp().retval)