]>
Commit | Line | Data |
---|---|---|
ba7eb55e DL |
1 | #!/usr/bin/python3 |
2 | # | |
3 | # 2019 by David Lamparter, placed in public domain | |
4 | # | |
5 | # This tool generates a report of possibly unused symbols in the build. It's | |
6 | # particularly useful for libfrr to find bitrotting functions that aren't even | |
7 | # used anywhere anymore. | |
8 | # | |
9 | # Note that the tool can't distinguish between "a symbol is completely unused" | |
10 | # and "a symbol is used only in its file" since file-internal references are | |
11 | # invisible in nm output. However, the compiler will warn you if a static | |
12 | # symbol is unused. | |
13 | # | |
14 | # This tool is only tested on Linux, it probably needs `nm` from GNU binutils | |
15 | # (as opposed to BSD `nm`). Could use pyelftools instead but that's a lot of | |
16 | # extra work. | |
17 | # | |
18 | # This is a developer tool, please don't put it in any packages :) | |
19 | ||
20 | import sys, os, subprocess | |
21 | import re | |
22 | from collections import namedtuple | |
23 | ||
24 | class MakeVars(object): | |
25 | ''' | |
26 | makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile | |
27 | ''' | |
28 | def __init__(self): | |
29 | self._data = dict() | |
30 | ||
31 | def getvars(self, varlist): | |
32 | ''' | |
33 | get a batch list of variables from make. faster than individual calls. | |
34 | ''' | |
35 | rdfd, wrfd = os.pipe() | |
36 | ||
37 | shvars = ['shvar-%s' % s for s in varlist] | |
38 | make = subprocess.Popen(['make', '-s', 'VARFD=%d' % wrfd] + shvars, pass_fds = [wrfd]) | |
39 | os.close(wrfd) | |
40 | data = b'' | |
41 | ||
42 | rdf = os.fdopen(rdfd, 'rb') | |
43 | while True: | |
44 | rdata = rdf.read() | |
45 | if len(rdata) == 0: | |
46 | break | |
47 | data += rdata | |
48 | ||
49 | del rdf | |
50 | make.wait() | |
51 | ||
52 | data = data.decode('US-ASCII').strip().split('\n') | |
53 | for row in data: | |
54 | k, v = row.split('=', 1) | |
55 | v = v[1:-1] | |
56 | self._data[k] = v | |
57 | ||
58 | def __getitem__(self, k): | |
59 | if k not in self._data: | |
60 | self.getvars([k]) | |
61 | return self._data[k] | |
62 | ||
63 | def get(self, k, defval = None): | |
64 | if k not in self._data: | |
65 | self.getvars([k]) | |
66 | return self._data[k] or defval | |
67 | ||
68 | SymRowBase = namedtuple('SymRow', ['target', 'object', 'name', 'address', 'klass', 'typ', 'size', 'line', 'section', 'loc']) | |
69 | class SymRow(SymRowBase): | |
70 | ''' | |
71 | wrapper around a line of `nm` output | |
72 | ''' | |
73 | lib_re = re.compile(r'/lib[^/]+\.(so|la)$') | |
74 | def is_global(self): | |
75 | return self.klass.isupper() or self.klass in 'uvw' | |
76 | def scope(self): | |
77 | if self.lib_re.search(self.target) is None: | |
78 | return self.target | |
79 | # "global" | |
80 | return None | |
81 | ||
82 | def is_export(self): | |
83 | ''' | |
84 | FRR-specific list of symbols which are considered "externally used" | |
85 | ||
86 | e.g. hooks are by design APIs for external use, same for qobj_t_* | |
87 | frr_inet_ntop is here because it's used through an ELF alias to | |
88 | "inet_ntop()" | |
89 | ''' | |
90 | if self.name in ['main', 'frr_inet_ntop', '_libfrr_version']: | |
91 | return True | |
92 | if self.name.startswith('_hook_'): | |
93 | return True | |
94 | if self.name.startswith('qobj_t_'): | |
95 | return True | |
96 | return False | |
97 | ||
98 | class Symbols(dict): | |
99 | ''' | |
100 | dict of all symbols in all libs & executables | |
101 | ''' | |
102 | ||
103 | from_re = re.compile(r'^Symbols from (.*?):$') | |
104 | lt_re = re.compile(r'^(.*/)([^/]+)\.l[oa]$') | |
105 | ||
106 | def __init__(self): | |
107 | super().__init__() | |
108 | ||
109 | class ReportSym(object): | |
110 | def __init__(self, sym): | |
111 | self.sym = sym | |
112 | def __repr__(self): | |
113 | return '<%-25s %-40s [%s]>' % (self.__class__.__name__ + ':', self.sym.name, self.sym.loc) | |
114 | def __lt__(self, other): | |
115 | return self.sym.name.__lt__(other.sym.name) | |
116 | ||
117 | class ReportSymCouldBeStaticAlreadyLocal(ReportSym): | |
118 | idshort = 'Z' | |
119 | idlong = 'extrastatic' | |
120 | title = "symbol is local to library, but only used in its source file (make static?)" | |
121 | class ReportSymCouldBeStatic(ReportSym): | |
122 | idshort = 'S' | |
123 | idlong = 'static' | |
124 | title = "symbol is only used in its source file (make static?)" | |
125 | class ReportSymCouldBeLibLocal(ReportSym): | |
126 | idshort = 'L' | |
127 | idlong = 'liblocal' | |
128 | title = "symbol is only used inside of library" | |
129 | class ReportSymModuleAPI(ReportSym): | |
130 | idshort = 'A' | |
131 | idlong = 'api' | |
132 | title = "symbol (in executable) is referenced externally from a module" | |
133 | ||
134 | class Symbol(object): | |
135 | def __init__(self, name): | |
136 | super().__init__() | |
137 | self.name = name | |
138 | self.defs = {} | |
139 | self.refs = [] | |
140 | ||
141 | def process(self, row): | |
142 | scope = row.scope() | |
143 | if row.section == '*UND*': | |
144 | self.refs.append(row) | |
145 | else: | |
146 | self.defs.setdefault(scope, []).append(row) | |
147 | ||
148 | def evaluate(self, out): | |
149 | ''' | |
150 | generate output report | |
151 | ||
152 | invoked after all object files have been read in, so it can look | |
153 | at inter-object-file relationships | |
154 | ''' | |
155 | if len(self.defs) == 0: | |
156 | out.extsyms.add(self.name) | |
157 | return | |
158 | ||
159 | for scopename, symdefs in self.defs.items(): | |
160 | common_defs = [symdef for symdef in symdefs if symdef.section == '*COM*'] | |
161 | proper_defs = [symdef for symdef in symdefs if symdef.section != '*COM*'] | |
162 | ||
163 | if len(proper_defs) > 1: | |
164 | print(self.name, ' DUPLICATE') | |
165 | print('\tD: %s %s' % (scopename, '\n\t\t'.join([repr(s) for s in symdefs]))) | |
166 | for syms in self.refs: | |
167 | print('\tR: %s' % (syms, )) | |
168 | return | |
169 | ||
170 | if len(proper_defs): | |
171 | primary_def = proper_defs[0] | |
172 | elif len(common_defs): | |
173 | # "common" = global variables without initializer; | |
174 | # they can occur in multiple .o files and the linker will | |
175 | # merge them into one variable/storage location. | |
176 | primary_def = common_defs[0] | |
177 | else: | |
178 | # undefined symbol, e.g. libc | |
179 | continue | |
180 | ||
181 | if scopename is not None and len(self.refs) > 0: | |
182 | for ref in self.refs: | |
183 | if ref.target != primary_def.target and ref.target.endswith('.la'): | |
184 | outobj = out.report.setdefault(primary_def.object, []) | |
185 | outobj.append(out.ReportSymModuleAPI(primary_def)) | |
186 | break | |
187 | ||
188 | if len(self.refs) == 0: | |
189 | if primary_def.is_export(): | |
190 | continue | |
191 | outobj = out.report.setdefault(primary_def.object, []) | |
192 | if primary_def.visible: | |
193 | outobj.append(out.ReportSymCouldBeStatic(primary_def)) | |
194 | else: | |
195 | outobj.append(out.ReportSymCouldBeStaticAlreadyLocal(primary_def)) | |
196 | continue | |
197 | ||
198 | if scopename is None and primary_def.visible: | |
199 | # lib symbol | |
200 | for ref in self.refs: | |
201 | if ref.target != primary_def.target: | |
202 | break | |
203 | else: | |
204 | outobj = out.report.setdefault(primary_def.object, []) | |
205 | outobj.append(out.ReportSymCouldBeLibLocal(primary_def)) | |
206 | ||
207 | ||
208 | def evaluate(self): | |
209 | self.extsyms = set() | |
210 | self.report = {} | |
211 | ||
212 | for sym in self.values(): | |
213 | sym.evaluate(self) | |
214 | ||
215 | def load(self, target, files): | |
216 | def libtoolmustdie(fn): | |
217 | m = self.lt_re.match(fn) | |
218 | if m is None: | |
219 | return fn | |
220 | return m.group(1) + '.libs/' + m.group(2) + '.o' | |
221 | ||
222 | def libtooltargetmustdie(fn): | |
223 | m = self.lt_re.match(fn) | |
224 | if m is None: | |
225 | a, b = fn.rsplit('/', 1) | |
226 | return '%s/.libs/%s' % (a, b) | |
227 | return m.group(1) + '.libs/' + m.group(2) + '.so' | |
228 | ||
229 | files = list(set([libtoolmustdie(fn) for fn in files])) | |
230 | ||
231 | def parse_nm_output(text): | |
232 | filename = None | |
233 | path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
234 | ||
235 | for line in text.split('\n'): | |
236 | if line.strip() == '': | |
237 | continue | |
238 | m = self.from_re.match(line) | |
239 | if m is not None: | |
240 | filename = m.group(1) | |
241 | continue | |
242 | if line.startswith('Name'): | |
243 | continue | |
244 | ||
245 | items = [i.strip() for i in line.split('|')] | |
246 | loc = None | |
247 | if '\t' in items[-1]: | |
248 | items[-1], loc = items[-1].split('\t', 1) | |
249 | fn, lno = loc.rsplit(':', 1) | |
250 | fn = os.path.relpath(fn, path_rel_to) | |
251 | loc = '%s:%s' % (fn, lno) | |
252 | ||
253 | items[1] = int(items[1] if items[1] != '' else '0', 16) | |
254 | items[4] = int(items[4] if items[4] != '' else '0', 16) | |
255 | items.append(loc) | |
256 | row = SymRow(target, filename, *items) | |
257 | ||
258 | if row.section == '.group' or row.name == '_GLOBAL_OFFSET_TABLE_': | |
259 | continue | |
260 | if not row.is_global(): | |
261 | continue | |
262 | ||
263 | yield row | |
264 | ||
265 | visible_syms = set() | |
266 | ||
267 | # the actual symbol report uses output from the individual object files | |
268 | # (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g. | |
269 | # lib/.libs/libfrr.so) to determine which symbols are actually visible | |
270 | # in the linked result (this covers ELF "hidden"/"internal" linkage) | |
271 | ||
272 | libfile = libtooltargetmustdie(target) | |
273 | nmlib = subprocess.Popen(['nm', '-l', '-g', '--defined-only', '-f', 'sysv', libfile], stdout = subprocess.PIPE) | |
274 | out = nmlib.communicate()[0].decode('US-ASCII') | |
275 | ||
276 | for row in parse_nm_output(out): | |
277 | visible_syms.add(row.name) | |
278 | ||
279 | nm = subprocess.Popen(['nm', '-l', '-f', 'sysv'] + files, stdout = subprocess.PIPE) | |
280 | out = nm.communicate()[0].decode('US-ASCII') | |
281 | ||
282 | for row in parse_nm_output(out): | |
283 | row.visible = row.name in visible_syms | |
284 | sym = self.setdefault(row.name, self.Symbol(row.name)) | |
285 | sym.process(row) | |
286 | ||
287 | ||
288 | def write_html_report(syms): | |
289 | try: | |
290 | import jinja2 | |
291 | except ImportError: | |
292 | sys.stderr.write('jinja2 could not be imported, not writing HTML report!\n') | |
293 | return | |
294 | ||
295 | self_path = os.path.dirname(os.path.abspath(__file__)) | |
296 | jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path)) | |
297 | template = jenv.get_template('symalyzer.html') | |
298 | ||
299 | dirgroups = {} | |
300 | for fn, reports in syms.report.items(): | |
301 | dirname, filename = fn.replace('.libs/', '').rsplit('/', 1) | |
302 | dirgroups.setdefault(dirname, {})[fn] = reports | |
303 | ||
304 | klasses = { | |
305 | 'T': 'code / plain old regular function (Text)', | |
306 | 'D': 'global variable, read-write, with nonzero initializer (Data)', | |
307 | 'B': 'global variable, read-write, with zero initializer (BSS)', | |
308 | 'C': 'global variable, read-write, with zero initializer (Common)', | |
309 | 'R': 'global variable, read-only (Rodata)', | |
310 | } | |
311 | ||
312 | with open('symalyzer_report.html.tmp', 'w') as fd: | |
313 | fd.write(template.render(dirgroups = dirgroups, klasses = klasses)) | |
314 | os.rename('symalyzer_report.html.tmp', 'symalyzer_report.html') | |
315 | ||
316 | if not os.path.exists('jquery-3.4.1.min.js'): | |
317 | url = 'https://code.jquery.com/jquery-3.4.1.min.js' | |
318 | sys.stderr.write( | |
319 | 'trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n' % (url)) | |
320 | import requests | |
321 | r = requests.get('https://code.jquery.com/jquery-3.4.1.min.js') | |
322 | if r.status_code != 200: | |
323 | sys.stderr.write('failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n') | |
324 | else: | |
325 | with open('jquery-3.4.1.min.js.tmp', 'w') as fd: | |
326 | fd.write(r.text) | |
327 | os.rename('jquery-3.4.1.min.js.tmp', 'jquery-3.4.1.min.js.tmp') | |
328 | sys.stderr.write('done.\n') | |
329 | ||
330 | def automake_escape(s): | |
331 | return s.replace('.', '_').replace('/', '_') | |
332 | ||
333 | if __name__ == '__main__': | |
334 | mv = MakeVars() | |
335 | ||
336 | if not (os.path.exists('config.version') and os.path.exists('lib/.libs/libfrr.so')): | |
337 | sys.stderr.write('please execute this script in the root directory of an FRR build tree\n') | |
338 | sys.stderr.write('./configure && make need to have completed successfully\n') | |
339 | sys.exit(1) | |
340 | ||
341 | amtargets = ['bin_PROGRAMS', 'sbin_PROGRAMS', 'lib_LTLIBRARIES', 'module_LTLIBRARIES'] | |
342 | targets = [] | |
343 | ||
344 | mv.getvars(amtargets) | |
345 | for amtarget in amtargets: | |
346 | targets.extend([item for item in mv[amtarget].strip().split() if item != 'tools/ssd']) | |
347 | ||
348 | mv.getvars(['%s_LDADD' % automake_escape(t) for t in targets]) | |
349 | ldobjs = targets[:] | |
350 | for t in targets: | |
351 | ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split() | |
352 | for item in ldadd: | |
353 | if item.startswith('-'): | |
354 | continue | |
355 | if item.endswith('.a'): | |
356 | ldobjs.append(item) | |
357 | ||
358 | mv.getvars(['%s_OBJECTS' % automake_escape(o) for o in ldobjs]) | |
359 | ||
360 | syms = Symbols() | |
361 | ||
362 | for t in targets: | |
363 | objs = mv['%s_OBJECTS' % automake_escape(t)].strip().split() | |
364 | ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split() | |
365 | for item in ldadd: | |
366 | if item.startswith('-'): | |
367 | continue | |
368 | if item.endswith('.a'): | |
369 | objs.extend(mv['%s_OBJECTS' % automake_escape(item)].strip().split()) | |
370 | ||
371 | sys.stderr.write('processing %s...\n' % t) | |
372 | sys.stderr.flush() | |
373 | #print(t, '\n\t', objs) | |
374 | syms.load(t, objs) | |
375 | ||
376 | syms.evaluate() | |
377 | ||
378 | for obj, reports in sorted(syms.report.items()): | |
379 | print('%s:' % obj) | |
380 | for report in reports: | |
381 | print('\t%r' % report) | |
382 | ||
383 | write_html_report(syms) |