]>
Commit | Line | Data |
---|---|---|
ba7eb55e DL |
1 | #!/usr/bin/python3 |
2 | # | |
3 | # 2019 by David Lamparter, placed in public domain | |
4 | # | |
5 | # This tool generates a report of possibly unused symbols in the build. It's | |
6 | # particularly useful for libfrr to find bitrotting functions that aren't even | |
7 | # used anywhere anymore. | |
8 | # | |
9 | # Note that the tool can't distinguish between "a symbol is completely unused" | |
10 | # and "a symbol is used only in its file" since file-internal references are | |
11 | # invisible in nm output. However, the compiler will warn you if a static | |
12 | # symbol is unused. | |
13 | # | |
14 | # This tool is only tested on Linux, it probably needs `nm` from GNU binutils | |
15 | # (as opposed to BSD `nm`). Could use pyelftools instead but that's a lot of | |
16 | # extra work. | |
17 | # | |
18 | # This is a developer tool, please don't put it in any packages :) | |
19 | ||
20 | import sys, os, subprocess | |
21 | import re | |
22 | from collections import namedtuple | |
23 | ||
701a0192 | 24 | sys.path.insert( |
25 | 0, | |
26 | os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "python"), | |
27 | ) | |
ba7eb55e | 28 | |
879a9dc5 | 29 | from makevars import MakeVars |
ba7eb55e | 30 | |
701a0192 | 31 | SymRowBase = namedtuple( |
32 | "SymRow", | |
33 | [ | |
34 | "target", | |
35 | "object", | |
36 | "name", | |
37 | "address", | |
38 | "klass", | |
39 | "typ", | |
40 | "size", | |
41 | "line", | |
42 | "section", | |
43 | "loc", | |
44 | ], | |
45 | ) | |
46 | ||
47 | ||
ba7eb55e | 48 | class SymRow(SymRowBase): |
701a0192 | 49 | """ |
ba7eb55e | 50 | wrapper around a line of `nm` output |
701a0192 | 51 | """ |
52 | ||
53 | lib_re = re.compile(r"/lib[^/]+\.(so|la)$") | |
54 | ||
ba7eb55e | 55 | def is_global(self): |
701a0192 | 56 | return self.klass.isupper() or self.klass in "uvw" |
57 | ||
ba7eb55e DL |
58 | def scope(self): |
59 | if self.lib_re.search(self.target) is None: | |
60 | return self.target | |
61 | # "global" | |
62 | return None | |
63 | ||
64 | def is_export(self): | |
701a0192 | 65 | """ |
ba7eb55e DL |
66 | FRR-specific list of symbols which are considered "externally used" |
67 | ||
68 | e.g. hooks are by design APIs for external use, same for qobj_t_* | |
69 | frr_inet_ntop is here because it's used through an ELF alias to | |
70 | "inet_ntop()" | |
701a0192 | 71 | """ |
72 | if self.name in ["main", "frr_inet_ntop", "_libfrr_version"]: | |
ba7eb55e | 73 | return True |
701a0192 | 74 | if self.name.startswith("_hook_"): |
ba7eb55e | 75 | return True |
701a0192 | 76 | if self.name.startswith("qobj_t_"): |
ba7eb55e DL |
77 | return True |
78 | return False | |
79 | ||
701a0192 | 80 | |
ba7eb55e | 81 | class Symbols(dict): |
701a0192 | 82 | """ |
ba7eb55e | 83 | dict of all symbols in all libs & executables |
701a0192 | 84 | """ |
ba7eb55e | 85 | |
701a0192 | 86 | from_re = re.compile(r"^Symbols from (.*?):$") |
87 | lt_re = re.compile(r"^(.*/)([^/]+)\.l[oa]$") | |
ba7eb55e DL |
88 | |
89 | def __init__(self): | |
90 | super().__init__() | |
91 | ||
92 | class ReportSym(object): | |
93 | def __init__(self, sym): | |
94 | self.sym = sym | |
701a0192 | 95 | |
ba7eb55e | 96 | def __repr__(self): |
701a0192 | 97 | return "<%-25s %-40s [%s]>" % ( |
98 | self.__class__.__name__ + ":", | |
99 | self.sym.name, | |
100 | self.sym.loc, | |
101 | ) | |
102 | ||
ba7eb55e DL |
103 | def __lt__(self, other): |
104 | return self.sym.name.__lt__(other.sym.name) | |
105 | ||
106 | class ReportSymCouldBeStaticAlreadyLocal(ReportSym): | |
701a0192 | 107 | idshort = "Z" |
108 | idlong = "extrastatic" | |
ba7eb55e | 109 | title = "symbol is local to library, but only used in its source file (make static?)" |
701a0192 | 110 | |
ba7eb55e | 111 | class ReportSymCouldBeStatic(ReportSym): |
701a0192 | 112 | idshort = "S" |
113 | idlong = "static" | |
ba7eb55e | 114 | title = "symbol is only used in its source file (make static?)" |
701a0192 | 115 | |
ba7eb55e | 116 | class ReportSymCouldBeLibLocal(ReportSym): |
701a0192 | 117 | idshort = "L" |
118 | idlong = "liblocal" | |
ba7eb55e | 119 | title = "symbol is only used inside of library" |
701a0192 | 120 | |
ba7eb55e | 121 | class ReportSymModuleAPI(ReportSym): |
701a0192 | 122 | idshort = "A" |
123 | idlong = "api" | |
ba7eb55e DL |
124 | title = "symbol (in executable) is referenced externally from a module" |
125 | ||
126 | class Symbol(object): | |
127 | def __init__(self, name): | |
128 | super().__init__() | |
129 | self.name = name | |
130 | self.defs = {} | |
131 | self.refs = [] | |
132 | ||
133 | def process(self, row): | |
134 | scope = row.scope() | |
701a0192 | 135 | if row.section == "*UND*": |
ba7eb55e DL |
136 | self.refs.append(row) |
137 | else: | |
138 | self.defs.setdefault(scope, []).append(row) | |
139 | ||
140 | def evaluate(self, out): | |
701a0192 | 141 | """ |
ba7eb55e DL |
142 | generate output report |
143 | ||
144 | invoked after all object files have been read in, so it can look | |
145 | at inter-object-file relationships | |
701a0192 | 146 | """ |
ba7eb55e DL |
147 | if len(self.defs) == 0: |
148 | out.extsyms.add(self.name) | |
149 | return | |
150 | ||
151 | for scopename, symdefs in self.defs.items(): | |
701a0192 | 152 | common_defs = [ |
153 | symdef for symdef in symdefs if symdef.section == "*COM*" | |
154 | ] | |
155 | proper_defs = [ | |
156 | symdef for symdef in symdefs if symdef.section != "*COM*" | |
157 | ] | |
ba7eb55e DL |
158 | |
159 | if len(proper_defs) > 1: | |
701a0192 | 160 | print(self.name, " DUPLICATE") |
161 | print( | |
162 | "\tD: %s %s" | |
163 | % (scopename, "\n\t\t".join([repr(s) for s in symdefs])) | |
164 | ) | |
ba7eb55e | 165 | for syms in self.refs: |
701a0192 | 166 | print("\tR: %s" % (syms,)) |
ba7eb55e DL |
167 | return |
168 | ||
169 | if len(proper_defs): | |
170 | primary_def = proper_defs[0] | |
171 | elif len(common_defs): | |
172 | # "common" = global variables without initializer; | |
173 | # they can occur in multiple .o files and the linker will | |
174 | # merge them into one variable/storage location. | |
175 | primary_def = common_defs[0] | |
176 | else: | |
177 | # undefined symbol, e.g. libc | |
178 | continue | |
179 | ||
180 | if scopename is not None and len(self.refs) > 0: | |
181 | for ref in self.refs: | |
701a0192 | 182 | if ref.target != primary_def.target and ref.target.endswith( |
183 | ".la" | |
184 | ): | |
ba7eb55e DL |
185 | outobj = out.report.setdefault(primary_def.object, []) |
186 | outobj.append(out.ReportSymModuleAPI(primary_def)) | |
187 | break | |
188 | ||
189 | if len(self.refs) == 0: | |
190 | if primary_def.is_export(): | |
191 | continue | |
192 | outobj = out.report.setdefault(primary_def.object, []) | |
193 | if primary_def.visible: | |
194 | outobj.append(out.ReportSymCouldBeStatic(primary_def)) | |
195 | else: | |
701a0192 | 196 | outobj.append( |
197 | out.ReportSymCouldBeStaticAlreadyLocal(primary_def) | |
198 | ) | |
ba7eb55e DL |
199 | continue |
200 | ||
201 | if scopename is None and primary_def.visible: | |
202 | # lib symbol | |
203 | for ref in self.refs: | |
204 | if ref.target != primary_def.target: | |
205 | break | |
206 | else: | |
207 | outobj = out.report.setdefault(primary_def.object, []) | |
208 | outobj.append(out.ReportSymCouldBeLibLocal(primary_def)) | |
209 | ||
ba7eb55e DL |
210 | def evaluate(self): |
211 | self.extsyms = set() | |
212 | self.report = {} | |
213 | ||
214 | for sym in self.values(): | |
215 | sym.evaluate(self) | |
216 | ||
217 | def load(self, target, files): | |
218 | def libtoolmustdie(fn): | |
219 | m = self.lt_re.match(fn) | |
220 | if m is None: | |
221 | return fn | |
701a0192 | 222 | return m.group(1) + ".libs/" + m.group(2) + ".o" |
ba7eb55e DL |
223 | |
224 | def libtooltargetmustdie(fn): | |
225 | m = self.lt_re.match(fn) | |
226 | if m is None: | |
701a0192 | 227 | a, b = fn.rsplit("/", 1) |
228 | return "%s/.libs/%s" % (a, b) | |
229 | return m.group(1) + ".libs/" + m.group(2) + ".so" | |
ba7eb55e DL |
230 | |
231 | files = list(set([libtoolmustdie(fn) for fn in files])) | |
232 | ||
233 | def parse_nm_output(text): | |
234 | filename = None | |
235 | path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
236 | ||
701a0192 | 237 | for line in text.split("\n"): |
238 | if line.strip() == "": | |
ba7eb55e DL |
239 | continue |
240 | m = self.from_re.match(line) | |
241 | if m is not None: | |
242 | filename = m.group(1) | |
243 | continue | |
701a0192 | 244 | if line.startswith("Name"): |
ba7eb55e DL |
245 | continue |
246 | ||
701a0192 | 247 | items = [i.strip() for i in line.split("|")] |
ba7eb55e | 248 | loc = None |
701a0192 | 249 | if "\t" in items[-1]: |
250 | items[-1], loc = items[-1].split("\t", 1) | |
251 | fn, lno = loc.rsplit(":", 1) | |
ba7eb55e | 252 | fn = os.path.relpath(fn, path_rel_to) |
701a0192 | 253 | loc = "%s:%s" % (fn, lno) |
ba7eb55e | 254 | |
701a0192 | 255 | items[1] = int(items[1] if items[1] != "" else "0", 16) |
256 | items[4] = int(items[4] if items[4] != "" else "0", 16) | |
ba7eb55e DL |
257 | items.append(loc) |
258 | row = SymRow(target, filename, *items) | |
259 | ||
701a0192 | 260 | if row.section == ".group" or row.name == "_GLOBAL_OFFSET_TABLE_": |
ba7eb55e DL |
261 | continue |
262 | if not row.is_global(): | |
263 | continue | |
264 | ||
265 | yield row | |
266 | ||
267 | visible_syms = set() | |
268 | ||
269 | # the actual symbol report uses output from the individual object files | |
270 | # (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g. | |
271 | # lib/.libs/libfrr.so) to determine which symbols are actually visible | |
272 | # in the linked result (this covers ELF "hidden"/"internal" linkage) | |
273 | ||
274 | libfile = libtooltargetmustdie(target) | |
701a0192 | 275 | nmlib = subprocess.Popen( |
276 | ["nm", "-l", "-g", "--defined-only", "-f", "sysv", libfile], | |
277 | stdout=subprocess.PIPE, | |
278 | ) | |
279 | out = nmlib.communicate()[0].decode("US-ASCII") | |
ba7eb55e DL |
280 | |
281 | for row in parse_nm_output(out): | |
282 | visible_syms.add(row.name) | |
283 | ||
701a0192 | 284 | nm = subprocess.Popen( |
285 | ["nm", "-l", "-f", "sysv"] + files, stdout=subprocess.PIPE | |
286 | ) | |
287 | out = nm.communicate()[0].decode("US-ASCII") | |
ba7eb55e DL |
288 | |
289 | for row in parse_nm_output(out): | |
290 | row.visible = row.name in visible_syms | |
291 | sym = self.setdefault(row.name, self.Symbol(row.name)) | |
292 | sym.process(row) | |
293 | ||
294 | ||
295 | def write_html_report(syms): | |
296 | try: | |
297 | import jinja2 | |
298 | except ImportError: | |
701a0192 | 299 | sys.stderr.write("jinja2 could not be imported, not writing HTML report!\n") |
ba7eb55e DL |
300 | return |
301 | ||
302 | self_path = os.path.dirname(os.path.abspath(__file__)) | |
303 | jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path)) | |
701a0192 | 304 | template = jenv.get_template("symalyzer.html") |
ba7eb55e DL |
305 | |
306 | dirgroups = {} | |
307 | for fn, reports in syms.report.items(): | |
701a0192 | 308 | dirname, filename = fn.replace(".libs/", "").rsplit("/", 1) |
ba7eb55e DL |
309 | dirgroups.setdefault(dirname, {})[fn] = reports |
310 | ||
311 | klasses = { | |
701a0192 | 312 | "T": "code / plain old regular function (Text)", |
313 | "D": "global variable, read-write, with nonzero initializer (Data)", | |
314 | "B": "global variable, read-write, with zero initializer (BSS)", | |
315 | "C": "global variable, read-write, with zero initializer (Common)", | |
316 | "R": "global variable, read-only (Rodata)", | |
ba7eb55e DL |
317 | } |
318 | ||
701a0192 | 319 | with open("symalyzer_report.html.tmp", "w") as fd: |
320 | fd.write(template.render(dirgroups=dirgroups, klasses=klasses)) | |
321 | os.rename("symalyzer_report.html.tmp", "symalyzer_report.html") | |
ba7eb55e | 322 | |
701a0192 | 323 | if not os.path.exists("jquery-3.4.1.min.js"): |
324 | url = "https://code.jquery.com/jquery-3.4.1.min.js" | |
ba7eb55e | 325 | sys.stderr.write( |
701a0192 | 326 | "trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n" |
327 | % (url) | |
328 | ) | |
ba7eb55e | 329 | import requests |
701a0192 | 330 | |
331 | r = requests.get("https://code.jquery.com/jquery-3.4.1.min.js") | |
ba7eb55e | 332 | if r.status_code != 200: |
701a0192 | 333 | sys.stderr.write( |
334 | "failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n" | |
335 | ) | |
ba7eb55e | 336 | else: |
701a0192 | 337 | with open("jquery-3.4.1.min.js.tmp", "w") as fd: |
ba7eb55e | 338 | fd.write(r.text) |
701a0192 | 339 | os.rename("jquery-3.4.1.min.js.tmp", "jquery-3.4.1.min.js") |
340 | sys.stderr.write("done.\n") | |
341 | ||
ba7eb55e DL |
342 | |
343 | def automake_escape(s): | |
701a0192 | 344 | return s.replace(".", "_").replace("/", "_") |
ba7eb55e | 345 | |
701a0192 | 346 | |
347 | if __name__ == "__main__": | |
ba7eb55e DL |
348 | mv = MakeVars() |
349 | ||
701a0192 | 350 | if not (os.path.exists("config.version") and os.path.exists("lib/.libs/libfrr.so")): |
351 | sys.stderr.write( | |
352 | "please execute this script in the root directory of an FRR build tree\n" | |
353 | ) | |
354 | sys.stderr.write("./configure && make need to have completed successfully\n") | |
ba7eb55e DL |
355 | sys.exit(1) |
356 | ||
701a0192 | 357 | amtargets = [ |
358 | "bin_PROGRAMS", | |
359 | "sbin_PROGRAMS", | |
360 | "lib_LTLIBRARIES", | |
361 | "module_LTLIBRARIES", | |
362 | ] | |
ba7eb55e DL |
363 | targets = [] |
364 | ||
365 | mv.getvars(amtargets) | |
366 | for amtarget in amtargets: | |
701a0192 | 367 | targets.extend( |
368 | [item for item in mv[amtarget].strip().split() if item != "tools/ssd"] | |
369 | ) | |
ba7eb55e | 370 | |
701a0192 | 371 | mv.getvars(["%s_LDADD" % automake_escape(t) for t in targets]) |
ba7eb55e DL |
372 | ldobjs = targets[:] |
373 | for t in targets: | |
701a0192 | 374 | ldadd = mv["%s_LDADD" % automake_escape(t)].strip().split() |
ba7eb55e | 375 | for item in ldadd: |
701a0192 | 376 | if item.startswith("-"): |
ba7eb55e | 377 | continue |
701a0192 | 378 | if item.endswith(".a"): |
ba7eb55e DL |
379 | ldobjs.append(item) |
380 | ||
701a0192 | 381 | mv.getvars(["%s_OBJECTS" % automake_escape(o) for o in ldobjs]) |
ba7eb55e DL |
382 | |
383 | syms = Symbols() | |
384 | ||
385 | for t in targets: | |
701a0192 | 386 | objs = mv["%s_OBJECTS" % automake_escape(t)].strip().split() |
387 | ldadd = mv["%s_LDADD" % automake_escape(t)].strip().split() | |
ba7eb55e | 388 | for item in ldadd: |
701a0192 | 389 | if item.startswith("-"): |
ba7eb55e | 390 | continue |
701a0192 | 391 | if item.endswith(".a"): |
392 | objs.extend(mv["%s_OBJECTS" % automake_escape(item)].strip().split()) | |
ba7eb55e | 393 | |
701a0192 | 394 | sys.stderr.write("processing %s...\n" % t) |
ba7eb55e | 395 | sys.stderr.flush() |
701a0192 | 396 | # print(t, '\n\t', objs) |
ba7eb55e DL |
397 | syms.load(t, objs) |
398 | ||
399 | syms.evaluate() | |
400 | ||
401 | for obj, reports in sorted(syms.report.items()): | |
701a0192 | 402 | print("%s:" % obj) |
ba7eb55e | 403 | for report in reports: |
701a0192 | 404 | print("\t%r" % report) |
ba7eb55e DL |
405 | |
406 | write_html_report(syms) |