]> git.proxmox.com Git - mirror_frr.git/blame - tools/symalyzer.py
tools: symalyzer
[mirror_frr.git] / tools / symalyzer.py
CommitLineData
ba7eb55e
DL
1#!/usr/bin/python3
2#
3# 2019 by David Lamparter, placed in public domain
4#
5# This tool generates a report of possibly unused symbols in the build. It's
6# particularly useful for libfrr to find bitrotting functions that aren't even
7# used anywhere anymore.
8#
9# Note that the tool can't distinguish between "a symbol is completely unused"
10# and "a symbol is used only in its file" since file-internal references are
11# invisible in nm output. However, the compiler will warn you if a static
12# symbol is unused.
13#
14# This tool is only tested on Linux, it probably needs `nm` from GNU binutils
15# (as opposed to BSD `nm`). Could use pyelftools instead but that's a lot of
16# extra work.
17#
18# This is a developer tool, please don't put it in any packages :)
19
20import sys, os, subprocess
21import re
22from collections import namedtuple
23
24class MakeVars(object):
25 '''
26 makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile
27 '''
28 def __init__(self):
29 self._data = dict()
30
31 def getvars(self, varlist):
32 '''
33 get a batch list of variables from make. faster than individual calls.
34 '''
35 rdfd, wrfd = os.pipe()
36
37 shvars = ['shvar-%s' % s for s in varlist]
38 make = subprocess.Popen(['make', '-s', 'VARFD=%d' % wrfd] + shvars, pass_fds = [wrfd])
39 os.close(wrfd)
40 data = b''
41
42 rdf = os.fdopen(rdfd, 'rb')
43 while True:
44 rdata = rdf.read()
45 if len(rdata) == 0:
46 break
47 data += rdata
48
49 del rdf
50 make.wait()
51
52 data = data.decode('US-ASCII').strip().split('\n')
53 for row in data:
54 k, v = row.split('=', 1)
55 v = v[1:-1]
56 self._data[k] = v
57
58 def __getitem__(self, k):
59 if k not in self._data:
60 self.getvars([k])
61 return self._data[k]
62
63 def get(self, k, defval = None):
64 if k not in self._data:
65 self.getvars([k])
66 return self._data[k] or defval
67
68SymRowBase = namedtuple('SymRow', ['target', 'object', 'name', 'address', 'klass', 'typ', 'size', 'line', 'section', 'loc'])
69class SymRow(SymRowBase):
70 '''
71 wrapper around a line of `nm` output
72 '''
73 lib_re = re.compile(r'/lib[^/]+\.(so|la)$')
74 def is_global(self):
75 return self.klass.isupper() or self.klass in 'uvw'
76 def scope(self):
77 if self.lib_re.search(self.target) is None:
78 return self.target
79 # "global"
80 return None
81
82 def is_export(self):
83 '''
84 FRR-specific list of symbols which are considered "externally used"
85
86 e.g. hooks are by design APIs for external use, same for qobj_t_*
87 frr_inet_ntop is here because it's used through an ELF alias to
88 "inet_ntop()"
89 '''
90 if self.name in ['main', 'frr_inet_ntop', '_libfrr_version']:
91 return True
92 if self.name.startswith('_hook_'):
93 return True
94 if self.name.startswith('qobj_t_'):
95 return True
96 return False
97
98class Symbols(dict):
99 '''
100 dict of all symbols in all libs & executables
101 '''
102
103 from_re = re.compile(r'^Symbols from (.*?):$')
104 lt_re = re.compile(r'^(.*/)([^/]+)\.l[oa]$')
105
106 def __init__(self):
107 super().__init__()
108
109 class ReportSym(object):
110 def __init__(self, sym):
111 self.sym = sym
112 def __repr__(self):
113 return '<%-25s %-40s [%s]>' % (self.__class__.__name__ + ':', self.sym.name, self.sym.loc)
114 def __lt__(self, other):
115 return self.sym.name.__lt__(other.sym.name)
116
117 class ReportSymCouldBeStaticAlreadyLocal(ReportSym):
118 idshort = 'Z'
119 idlong = 'extrastatic'
120 title = "symbol is local to library, but only used in its source file (make static?)"
121 class ReportSymCouldBeStatic(ReportSym):
122 idshort = 'S'
123 idlong = 'static'
124 title = "symbol is only used in its source file (make static?)"
125 class ReportSymCouldBeLibLocal(ReportSym):
126 idshort = 'L'
127 idlong = 'liblocal'
128 title = "symbol is only used inside of library"
129 class ReportSymModuleAPI(ReportSym):
130 idshort = 'A'
131 idlong = 'api'
132 title = "symbol (in executable) is referenced externally from a module"
133
134 class Symbol(object):
135 def __init__(self, name):
136 super().__init__()
137 self.name = name
138 self.defs = {}
139 self.refs = []
140
141 def process(self, row):
142 scope = row.scope()
143 if row.section == '*UND*':
144 self.refs.append(row)
145 else:
146 self.defs.setdefault(scope, []).append(row)
147
148 def evaluate(self, out):
149 '''
150 generate output report
151
152 invoked after all object files have been read in, so it can look
153 at inter-object-file relationships
154 '''
155 if len(self.defs) == 0:
156 out.extsyms.add(self.name)
157 return
158
159 for scopename, symdefs in self.defs.items():
160 common_defs = [symdef for symdef in symdefs if symdef.section == '*COM*']
161 proper_defs = [symdef for symdef in symdefs if symdef.section != '*COM*']
162
163 if len(proper_defs) > 1:
164 print(self.name, ' DUPLICATE')
165 print('\tD: %s %s' % (scopename, '\n\t\t'.join([repr(s) for s in symdefs])))
166 for syms in self.refs:
167 print('\tR: %s' % (syms, ))
168 return
169
170 if len(proper_defs):
171 primary_def = proper_defs[0]
172 elif len(common_defs):
173 # "common" = global variables without initializer;
174 # they can occur in multiple .o files and the linker will
175 # merge them into one variable/storage location.
176 primary_def = common_defs[0]
177 else:
178 # undefined symbol, e.g. libc
179 continue
180
181 if scopename is not None and len(self.refs) > 0:
182 for ref in self.refs:
183 if ref.target != primary_def.target and ref.target.endswith('.la'):
184 outobj = out.report.setdefault(primary_def.object, [])
185 outobj.append(out.ReportSymModuleAPI(primary_def))
186 break
187
188 if len(self.refs) == 0:
189 if primary_def.is_export():
190 continue
191 outobj = out.report.setdefault(primary_def.object, [])
192 if primary_def.visible:
193 outobj.append(out.ReportSymCouldBeStatic(primary_def))
194 else:
195 outobj.append(out.ReportSymCouldBeStaticAlreadyLocal(primary_def))
196 continue
197
198 if scopename is None and primary_def.visible:
199 # lib symbol
200 for ref in self.refs:
201 if ref.target != primary_def.target:
202 break
203 else:
204 outobj = out.report.setdefault(primary_def.object, [])
205 outobj.append(out.ReportSymCouldBeLibLocal(primary_def))
206
207
208 def evaluate(self):
209 self.extsyms = set()
210 self.report = {}
211
212 for sym in self.values():
213 sym.evaluate(self)
214
215 def load(self, target, files):
216 def libtoolmustdie(fn):
217 m = self.lt_re.match(fn)
218 if m is None:
219 return fn
220 return m.group(1) + '.libs/' + m.group(2) + '.o'
221
222 def libtooltargetmustdie(fn):
223 m = self.lt_re.match(fn)
224 if m is None:
225 a, b = fn.rsplit('/', 1)
226 return '%s/.libs/%s' % (a, b)
227 return m.group(1) + '.libs/' + m.group(2) + '.so'
228
229 files = list(set([libtoolmustdie(fn) for fn in files]))
230
231 def parse_nm_output(text):
232 filename = None
233 path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
234
235 for line in text.split('\n'):
236 if line.strip() == '':
237 continue
238 m = self.from_re.match(line)
239 if m is not None:
240 filename = m.group(1)
241 continue
242 if line.startswith('Name'):
243 continue
244
245 items = [i.strip() for i in line.split('|')]
246 loc = None
247 if '\t' in items[-1]:
248 items[-1], loc = items[-1].split('\t', 1)
249 fn, lno = loc.rsplit(':', 1)
250 fn = os.path.relpath(fn, path_rel_to)
251 loc = '%s:%s' % (fn, lno)
252
253 items[1] = int(items[1] if items[1] != '' else '0', 16)
254 items[4] = int(items[4] if items[4] != '' else '0', 16)
255 items.append(loc)
256 row = SymRow(target, filename, *items)
257
258 if row.section == '.group' or row.name == '_GLOBAL_OFFSET_TABLE_':
259 continue
260 if not row.is_global():
261 continue
262
263 yield row
264
265 visible_syms = set()
266
267 # the actual symbol report uses output from the individual object files
268 # (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g.
269 # lib/.libs/libfrr.so) to determine which symbols are actually visible
270 # in the linked result (this covers ELF "hidden"/"internal" linkage)
271
272 libfile = libtooltargetmustdie(target)
273 nmlib = subprocess.Popen(['nm', '-l', '-g', '--defined-only', '-f', 'sysv', libfile], stdout = subprocess.PIPE)
274 out = nmlib.communicate()[0].decode('US-ASCII')
275
276 for row in parse_nm_output(out):
277 visible_syms.add(row.name)
278
279 nm = subprocess.Popen(['nm', '-l', '-f', 'sysv'] + files, stdout = subprocess.PIPE)
280 out = nm.communicate()[0].decode('US-ASCII')
281
282 for row in parse_nm_output(out):
283 row.visible = row.name in visible_syms
284 sym = self.setdefault(row.name, self.Symbol(row.name))
285 sym.process(row)
286
287
288def write_html_report(syms):
289 try:
290 import jinja2
291 except ImportError:
292 sys.stderr.write('jinja2 could not be imported, not writing HTML report!\n')
293 return
294
295 self_path = os.path.dirname(os.path.abspath(__file__))
296 jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path))
297 template = jenv.get_template('symalyzer.html')
298
299 dirgroups = {}
300 for fn, reports in syms.report.items():
301 dirname, filename = fn.replace('.libs/', '').rsplit('/', 1)
302 dirgroups.setdefault(dirname, {})[fn] = reports
303
304 klasses = {
305 'T': 'code / plain old regular function (Text)',
306 'D': 'global variable, read-write, with nonzero initializer (Data)',
307 'B': 'global variable, read-write, with zero initializer (BSS)',
308 'C': 'global variable, read-write, with zero initializer (Common)',
309 'R': 'global variable, read-only (Rodata)',
310 }
311
312 with open('symalyzer_report.html.tmp', 'w') as fd:
313 fd.write(template.render(dirgroups = dirgroups, klasses = klasses))
314 os.rename('symalyzer_report.html.tmp', 'symalyzer_report.html')
315
316 if not os.path.exists('jquery-3.4.1.min.js'):
317 url = 'https://code.jquery.com/jquery-3.4.1.min.js'
318 sys.stderr.write(
319 'trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n' % (url))
320 import requests
321 r = requests.get('https://code.jquery.com/jquery-3.4.1.min.js')
322 if r.status_code != 200:
323 sys.stderr.write('failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n')
324 else:
325 with open('jquery-3.4.1.min.js.tmp', 'w') as fd:
326 fd.write(r.text)
327 os.rename('jquery-3.4.1.min.js.tmp', 'jquery-3.4.1.min.js.tmp')
328 sys.stderr.write('done.\n')
329
330def automake_escape(s):
331 return s.replace('.', '_').replace('/', '_')
332
333if __name__ == '__main__':
334 mv = MakeVars()
335
336 if not (os.path.exists('config.version') and os.path.exists('lib/.libs/libfrr.so')):
337 sys.stderr.write('please execute this script in the root directory of an FRR build tree\n')
338 sys.stderr.write('./configure && make need to have completed successfully\n')
339 sys.exit(1)
340
341 amtargets = ['bin_PROGRAMS', 'sbin_PROGRAMS', 'lib_LTLIBRARIES', 'module_LTLIBRARIES']
342 targets = []
343
344 mv.getvars(amtargets)
345 for amtarget in amtargets:
346 targets.extend([item for item in mv[amtarget].strip().split() if item != 'tools/ssd'])
347
348 mv.getvars(['%s_LDADD' % automake_escape(t) for t in targets])
349 ldobjs = targets[:]
350 for t in targets:
351 ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
352 for item in ldadd:
353 if item.startswith('-'):
354 continue
355 if item.endswith('.a'):
356 ldobjs.append(item)
357
358 mv.getvars(['%s_OBJECTS' % automake_escape(o) for o in ldobjs])
359
360 syms = Symbols()
361
362 for t in targets:
363 objs = mv['%s_OBJECTS' % automake_escape(t)].strip().split()
364 ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split()
365 for item in ldadd:
366 if item.startswith('-'):
367 continue
368 if item.endswith('.a'):
369 objs.extend(mv['%s_OBJECTS' % automake_escape(item)].strip().split())
370
371 sys.stderr.write('processing %s...\n' % t)
372 sys.stderr.flush()
373 #print(t, '\n\t', objs)
374 syms.load(t, objs)
375
376 syms.evaluate()
377
378 for obj, reports in sorted(syms.report.items()):
379 print('%s:' % obj)
380 for report in reports:
381 print('\t%r' % report)
382
383 write_html_report(syms)