]>
Commit | Line | Data |
---|---|---|
b3020464 TR |
1 | #!/usr/bin/env python |
2 | # SPDX-License-Identifier: GPL-2.0 | |
3 | # | |
4 | # Copyright (C) Google LLC, 2018 | |
5 | # | |
6 | # Author: Tom Roeder <tmroeder@google.com> | |
7 | # | |
8 | """A tool for generating compile_commands.json in the Linux kernel.""" | |
9 | ||
10 | import argparse | |
11 | import json | |
12 | import logging | |
13 | import os | |
14 | import re | |
ecca4fea | 15 | import subprocess |
b3020464 TR |
16 | |
17 | _DEFAULT_OUTPUT = 'compile_commands.json' | |
18 | _DEFAULT_LOG_LEVEL = 'WARNING' | |
19 | ||
20 | _FILENAME_PATTERN = r'^\..*\.cmd$' | |
21 | _LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$' | |
22 | _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | |
23 | ||
b3020464 TR |
24 | |
25 | def parse_arguments(): | |
26 | """Sets up and parses command-line arguments. | |
27 | ||
28 | Returns: | |
29 | log_level: A logging level to filter log output. | |
0a7d376d | 30 | directory: The work directory where the objects were built. |
ecca4fea | 31 | ar: Command used for parsing .a archives. |
b3020464 | 32 | output: Where to write the compile-commands JSON file. |
ecca4fea | 33 | paths: The list of files/directories to handle to find .cmd files. |
b3020464 TR |
34 | """ |
35 | usage = 'Creates a compile_commands.json database from kernel .cmd files' | |
36 | parser = argparse.ArgumentParser(description=usage) | |
37 | ||
0a7d376d | 38 | directory_help = ('specify the output directory used for the kernel build ' |
b3020464 | 39 | '(defaults to the working directory)') |
6fca36f1 MY |
40 | parser.add_argument('-d', '--directory', type=str, default='.', |
41 | help=directory_help) | |
b3020464 | 42 | |
6fca36f1 MY |
43 | output_help = ('path to the output command database (defaults to ' + |
44 | _DEFAULT_OUTPUT + ')') | |
45 | parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, | |
46 | help=output_help) | |
b3020464 | 47 | |
ea6cedc5 | 48 | log_level_help = ('the level of log messages to produce (defaults to ' + |
b3020464 | 49 | _DEFAULT_LOG_LEVEL + ')') |
ea6cedc5 MY |
50 | parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, |
51 | default=_DEFAULT_LOG_LEVEL, help=log_level_help) | |
b3020464 | 52 | |
ecca4fea MY |
53 | ar_help = 'command used for parsing .a archives' |
54 | parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) | |
55 | ||
56 | paths_help = ('directories to search or files to parse ' | |
57 | '(files should be *.o, *.a, or modules.order). ' | |
58 | 'If nothing is specified, the current directory is searched') | |
59 | parser.add_argument('paths', type=str, nargs='*', help=paths_help) | |
60 | ||
b3020464 TR |
61 | args = parser.parse_args() |
62 | ||
6fca36f1 MY |
63 | return (args.log_level, |
64 | os.path.abspath(args.directory), | |
fc2cb22e | 65 | args.output, |
ecca4fea MY |
66 | args.ar, |
67 | args.paths if len(args.paths) > 0 else [args.directory]) | |
fc2cb22e MY |
68 | |
69 | ||
70 | def cmdfiles_in_dir(directory): | |
71 | """Generate the iterator of .cmd files found under the directory. | |
72 | ||
73 | Walk under the given directory, and yield every .cmd file found. | |
74 | ||
75 | Args: | |
76 | directory: The directory to search for .cmd files. | |
77 | ||
78 | Yields: | |
79 | The path to a .cmd file. | |
80 | """ | |
81 | ||
82 | filename_matcher = re.compile(_FILENAME_PATTERN) | |
83 | ||
84 | for dirpath, _, filenames in os.walk(directory): | |
85 | for filename in filenames: | |
86 | if filename_matcher.match(filename): | |
87 | yield os.path.join(dirpath, filename) | |
b3020464 TR |
88 | |
89 | ||
ecca4fea MY |
90 | def to_cmdfile(path): |
91 | """Return the path of .cmd file used for the given build artifact | |
92 | ||
93 | Args: | |
94 | Path: file path | |
95 | ||
96 | Returns: | |
97 | The path to .cmd file | |
98 | """ | |
99 | dir, base = os.path.split(path) | |
100 | return os.path.join(dir, '.' + base + '.cmd') | |
101 | ||
102 | ||
103 | def cmdfiles_for_o(obj): | |
104 | """Generate the iterator of .cmd files associated with the object | |
105 | ||
106 | Yield the .cmd file used to build the given object | |
107 | ||
108 | Args: | |
109 | obj: The object path | |
110 | ||
111 | Yields: | |
112 | The path to .cmd file | |
113 | """ | |
114 | yield to_cmdfile(obj) | |
115 | ||
116 | ||
117 | def cmdfiles_for_a(archive, ar): | |
118 | """Generate the iterator of .cmd files associated with the archive. | |
119 | ||
120 | Parse the given archive, and yield every .cmd file used to build it. | |
121 | ||
122 | Args: | |
123 | archive: The archive to parse | |
124 | ||
125 | Yields: | |
126 | The path to every .cmd file found | |
127 | """ | |
128 | for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): | |
129 | yield to_cmdfile(obj) | |
130 | ||
131 | ||
132 | def cmdfiles_for_modorder(modorder): | |
133 | """Generate the iterator of .cmd files associated with the modules.order. | |
134 | ||
135 | Parse the given modules.order, and yield every .cmd file used to build the | |
136 | contained modules. | |
137 | ||
138 | Args: | |
139 | modorder: The modules.order file to parse | |
140 | ||
141 | Yields: | |
142 | The path to every .cmd file found | |
143 | """ | |
144 | with open(modorder) as f: | |
145 | for line in f: | |
146 | ko = line.rstrip() | |
147 | base, ext = os.path.splitext(ko) | |
148 | if ext != '.ko': | |
149 | sys.exit('{}: module path must end with .ko'.format(ko)) | |
150 | mod = base + '.mod' | |
151 | # The first line of *.mod lists the objects that compose the module. | |
152 | with open(mod) as m: | |
153 | for obj in m.readline().split(): | |
154 | yield to_cmdfile(obj) | |
155 | ||
156 | ||
6ca4c6d2 | 157 | def process_line(root_directory, command_prefix, file_path): |
b3020464 TR |
158 | """Extracts information from a .cmd line and creates an entry from it. |
159 | ||
160 | Args: | |
161 | root_directory: The directory that was searched for .cmd files. Usually | |
162 | used directly in the "directory" entry in compile_commands.json. | |
b3020464 | 163 | command_prefix: The extracted command line, up to the last element. |
6ca4c6d2 MY |
164 | file_path: The .c file from the end of the extracted command. |
165 | Usually relative to root_directory, but sometimes absolute. | |
b3020464 TR |
166 | |
167 | Returns: | |
168 | An entry to append to compile_commands. | |
169 | ||
170 | Raises: | |
6ca4c6d2 | 171 | ValueError: Could not find the extracted file based on file_path and |
b3020464 TR |
172 | root_directory or file_directory. |
173 | """ | |
174 | # The .cmd files are intended to be included directly by Make, so they | |
175 | # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the | |
176 | # kernel version). The compile_commands.json file is not interepreted | |
177 | # by Make, so this code replaces the escaped version with '#'. | |
178 | prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') | |
179 | ||
6ca4c6d2 MY |
180 | # Use os.path.abspath() to normalize the path resolving '.' and '..' . |
181 | abs_path = os.path.abspath(os.path.join(root_directory, file_path)) | |
182 | if not os.path.exists(abs_path): | |
183 | raise ValueError('File %s not found' % abs_path) | |
b3020464 | 184 | return { |
6ca4c6d2 MY |
185 | 'directory': root_directory, |
186 | 'file': abs_path, | |
187 | 'command': prefix + file_path, | |
b3020464 TR |
188 | } |
189 | ||
190 | ||
191 | def main(): | |
192 | """Walks through the directory and finds and parses .cmd files.""" | |
ecca4fea | 193 | log_level, directory, output, ar, paths = parse_arguments() |
b3020464 TR |
194 | |
195 | level = getattr(logging, log_level) | |
196 | logging.basicConfig(format='%(levelname)s: %(message)s', level=level) | |
197 | ||
b3020464 TR |
198 | line_matcher = re.compile(_LINE_PATTERN) |
199 | ||
200 | compile_commands = [] | |
b3020464 | 201 | |
fc2cb22e | 202 | for path in paths: |
ecca4fea MY |
203 | # If 'path' is a directory, handle all .cmd files under it. |
204 | # Otherwise, handle .cmd files associated with the file. | |
205 | # Most of built-in objects are linked via archives (built-in.a or lib.a) | |
206 | # but some objects are linked to vmlinux directly. | |
207 | # Modules are listed in modules.order. | |
208 | if os.path.isdir(path): | |
209 | cmdfiles = cmdfiles_in_dir(path) | |
210 | elif path.endswith('.o'): | |
211 | cmdfiles = cmdfiles_for_o(path) | |
212 | elif path.endswith('.a'): | |
213 | cmdfiles = cmdfiles_for_a(path, ar) | |
214 | elif path.endswith('modules.order'): | |
215 | cmdfiles = cmdfiles_for_modorder(path) | |
216 | else: | |
217 | sys.exit('{}: unknown file type'.format(path)) | |
fc2cb22e MY |
218 | |
219 | for cmdfile in cmdfiles: | |
220 | with open(cmdfile, 'rt') as f: | |
8a685db3 MY |
221 | result = line_matcher.match(f.readline()) |
222 | if result: | |
b3020464 | 223 | try: |
fc2cb22e MY |
224 | entry = process_line(directory, result.group(1), |
225 | result.group(2)) | |
b3020464 TR |
226 | compile_commands.append(entry) |
227 | except ValueError as err: | |
228 | logging.info('Could not add line from %s: %s', | |
fc2cb22e | 229 | cmdfile, err) |
b3020464 TR |
230 | |
231 | with open(output, 'wt') as f: | |
232 | json.dump(compile_commands, f, indent=2, sort_keys=True) | |
233 | ||
b3020464 TR |
234 | |
235 | if __name__ == '__main__': | |
236 | main() |