]>
Commit | Line | Data |
---|---|---|
9f95a23c | 1 | #!/usr/bin/env python3 |
11fdf7f2 TL |
2 | |
3 | import abc | |
4 | import argparse | |
20effc67 | 5 | import distutils.util |
11fdf7f2 TL |
6 | import enum |
7 | import functools | |
8 | import glob | |
9 | import itertools | |
9f95a23c | 10 | import logging |
1e59de90 | 11 | import math |
11fdf7f2 TL |
12 | import multiprocessing |
13 | import os | |
14 | import pathlib | |
15 | import pyudev | |
16 | import re | |
17 | import shutil | |
18 | import subprocess | |
19 | import sys | |
20 | import urllib.request | |
21 | import yaml | |
9f95a23c TL |
22 | import platform |
23 | import shlex | |
11fdf7f2 | 24 | |
9f95a23c TL |
25 | dry_run_mode = False |
26 | def perftune_print(log_msg, *args, **kwargs): | |
27 | if dry_run_mode: | |
28 | log_msg = "# " + log_msg | |
29 | print(log_msg, *args, **kwargs) | |
30 | ||
31 | def __run_one_command(prog_args, stderr=None, check=True): | |
32 | proc = subprocess.Popen(prog_args, stdout = subprocess.PIPE, stderr = stderr) | |
11fdf7f2 TL |
33 | outs, errs = proc.communicate() |
34 | outs = str(outs, 'utf-8') | |
35 | ||
36 | if check and proc.returncode != 0: | |
37 | raise subprocess.CalledProcessError(returncode=proc.returncode, cmd=" ".join(prog_args), output=outs, stderr=errs) | |
38 | ||
39 | return outs | |
40 | ||
9f95a23c TL |
41 | def run_one_command(prog_args, stderr=None, check=True): |
42 | if dry_run_mode: | |
43 | print(" ".join([shlex.quote(x) for x in prog_args])) | |
44 | else: | |
45 | __run_one_command(prog_args, stderr=stderr, check=check) | |
46 | ||
47 | def run_read_only_command(prog_args, stderr=None, check=True): | |
48 | return __run_one_command(prog_args, stderr=stderr, check=check) | |
49 | ||
11fdf7f2 TL |
50 | def run_hwloc_distrib(prog_args): |
51 | """ | |
52 | Returns a list of strings - each representing a single line of hwloc-distrib output. | |
53 | """ | |
9f95a23c | 54 | return run_read_only_command(['hwloc-distrib'] + prog_args).splitlines() |
11fdf7f2 TL |
55 | |
56 | def run_hwloc_calc(prog_args): | |
57 | """ | |
58 | Returns a single string with the result of the execution. | |
59 | """ | |
9f95a23c | 60 | return run_read_only_command(['hwloc-calc'] + prog_args).rstrip() |
11fdf7f2 | 61 | |
20effc67 TL |
62 | def run_ethtool(prog_args): |
63 | """ | |
64 | Returns a list of strings - each representing a single line of ethtool output. | |
65 | """ | |
66 | return run_read_only_command(['ethtool'] + prog_args).splitlines() | |
67 | ||
9f95a23c | 68 | def fwriteln(fname, line, log_message, log_errors=True): |
11fdf7f2 | 69 | try: |
9f95a23c TL |
70 | if dry_run_mode: |
71 | print("echo {} > {}".format(line, fname)) | |
72 | return | |
73 | else: | |
74 | with open(fname, 'w') as f: | |
75 | f.write(line) | |
76 | print(log_message) | |
11fdf7f2 | 77 | except: |
9f95a23c TL |
78 | if log_errors: |
79 | print("{}: failed to write into {}: {}".format(log_message, fname, sys.exc_info())) | |
11fdf7f2 TL |
80 | |
81 | def readlines(fname): | |
82 | try: | |
83 | with open(fname, 'r') as f: | |
84 | return f.readlines() | |
85 | except: | |
86 | print("Failed to read {}: {}".format(fname, sys.exc_info())) | |
87 | return [] | |
88 | ||
9f95a23c TL |
89 | def fwriteln_and_log(fname, line, log_errors=True): |
90 | msg = "Writing '{}' to {}".format(line, fname) | |
91 | fwriteln(fname, line, log_message=msg, log_errors=log_errors) | |
11fdf7f2 TL |
92 | |
93 | double_commas_pattern = re.compile(',,') | |
94 | ||
9f95a23c | 95 | def set_one_mask(conf_file, mask, log_errors=True): |
11fdf7f2 TL |
96 | if not os.path.exists(conf_file): |
97 | raise Exception("Configure file to set mask doesn't exist: {}".format(conf_file)) | |
98 | mask = re.sub('0x', '', mask) | |
99 | ||
100 | while double_commas_pattern.search(mask): | |
101 | mask = double_commas_pattern.sub(',0,', mask) | |
102 | ||
9f95a23c TL |
103 | msg = "Setting mask {} in {}".format(mask, conf_file) |
104 | fwriteln(conf_file, mask, log_message=msg, log_errors=log_errors) | |
11fdf7f2 | 105 | |
9f95a23c | 106 | def distribute_irqs(irqs, cpu_mask, log_errors=True): |
11fdf7f2 TL |
107 | # If IRQs' list is empty - do nothing |
108 | if not irqs: | |
109 | return | |
110 | ||
111 | for i, mask in enumerate(run_hwloc_distrib(["{}".format(len(irqs)), '--single', '--restrict', cpu_mask])): | |
9f95a23c | 112 | set_one_mask("/proc/irq/{}/smp_affinity".format(irqs[i]), mask, log_errors=log_errors) |
11fdf7f2 TL |
113 | |
114 | def is_process_running(name): | |
9f95a23c | 115 | return len(list(filter(lambda ps_line : not re.search('<defunct>', ps_line), run_read_only_command(['ps', '--no-headers', '-C', name], check=False).splitlines()))) > 0 |
11fdf7f2 TL |
116 | |
117 | def restart_irqbalance(banned_irqs): | |
118 | """ | |
119 | Restart irqbalance if it's running and ban it from moving the IRQs from the | |
120 | given list. | |
121 | """ | |
122 | config_file = '/etc/default/irqbalance' | |
123 | options_key = 'OPTIONS' | |
124 | systemd = False | |
125 | banned_irqs_list = list(banned_irqs) | |
126 | ||
127 | # If there is nothing to ban - quit | |
128 | if not banned_irqs_list: | |
129 | return | |
130 | ||
131 | # return early if irqbalance is not running | |
132 | if not is_process_running('irqbalance'): | |
9f95a23c | 133 | perftune_print("irqbalance is not running") |
11fdf7f2 TL |
134 | return |
135 | ||
9f95a23c TL |
136 | # If this file exists - this a "new (systemd) style" irqbalance packaging. |
137 | # This type of packaging uses IRQBALANCE_ARGS as an option key name, "old (init.d) style" | |
138 | # packaging uses an OPTION key. | |
1e59de90 TL |
139 | if os.path.exists('/lib/systemd/system/irqbalance.service') or \ |
140 | os.path.exists('/usr/lib/systemd/system/irqbalance.service'): | |
9f95a23c TL |
141 | options_key = 'IRQBALANCE_ARGS' |
142 | systemd = True | |
143 | ||
11fdf7f2 TL |
144 | if not os.path.exists(config_file): |
145 | if os.path.exists('/etc/sysconfig/irqbalance'): | |
146 | config_file = '/etc/sysconfig/irqbalance' | |
11fdf7f2 TL |
147 | elif os.path.exists('/etc/conf.d/irqbalance'): |
148 | config_file = '/etc/conf.d/irqbalance' | |
149 | options_key = 'IRQBALANCE_OPTS' | |
150 | with open('/proc/1/comm', 'r') as comm: | |
151 | systemd = 'systemd' in comm.read() | |
152 | else: | |
9f95a23c TL |
153 | perftune_print("Unknown system configuration - not restarting irqbalance!") |
154 | perftune_print("You have to prevent it from moving IRQs {} manually!".format(banned_irqs_list)) | |
11fdf7f2 TL |
155 | return |
156 | ||
157 | orig_file = "{}.scylla.orig".format(config_file) | |
158 | ||
159 | # Save the original file | |
9f95a23c TL |
160 | if not dry_run_mode: |
161 | if not os.path.exists(orig_file): | |
162 | print("Saving the original irqbalance configuration is in {}".format(orig_file)) | |
163 | shutil.copyfile(config_file, orig_file) | |
164 | else: | |
165 | print("File {} already exists - not overwriting.".format(orig_file)) | |
11fdf7f2 TL |
166 | |
167 | # Read the config file lines | |
168 | cfile_lines = open(config_file, 'r').readlines() | |
169 | ||
170 | # Build the new config_file contents with the new options configuration | |
9f95a23c | 171 | perftune_print("Restarting irqbalance: going to ban the following IRQ numbers: {} ...".format(", ".join(banned_irqs_list))) |
11fdf7f2 TL |
172 | |
173 | # Search for the original options line | |
174 | opt_lines = list(filter(lambda line : re.search("^\s*{}".format(options_key), line), cfile_lines)) | |
175 | if not opt_lines: | |
176 | new_options = "{}=\"".format(options_key) | |
177 | elif len(opt_lines) == 1: | |
178 | # cut the last " | |
179 | new_options = re.sub("\"\s*$", "", opt_lines[0].rstrip()) | |
9f95a23c | 180 | opt_lines = opt_lines[0].strip() |
11fdf7f2 TL |
181 | else: |
182 | raise Exception("Invalid format in {}: more than one lines with {} key".format(config_file, options_key)) | |
183 | ||
184 | for irq in banned_irqs_list: | |
185 | # prevent duplicate "ban" entries for the same IRQ | |
186 | patt_str = "\-\-banirq\={}\Z|\-\-banirq\={}\s".format(irq, irq) | |
187 | if not re.search(patt_str, new_options): | |
188 | new_options += " --banirq={}".format(irq) | |
189 | ||
190 | new_options += "\"" | |
191 | ||
9f95a23c TL |
192 | if dry_run_mode: |
193 | if opt_lines: | |
194 | print("sed -i 's/^{}/#{}/g' {}".format(options_key, options_key, config_file)) | |
195 | print("echo {} | tee -a {}".format(new_options, config_file)) | |
196 | else: | |
197 | with open(config_file, 'w') as cfile: | |
198 | for line in cfile_lines: | |
199 | if not re.search("^\s*{}".format(options_key), line): | |
200 | cfile.write(line) | |
11fdf7f2 | 201 | |
9f95a23c | 202 | cfile.write(new_options + "\n") |
11fdf7f2 TL |
203 | |
204 | if systemd: | |
9f95a23c | 205 | perftune_print("Restarting irqbalance via systemctl...") |
11fdf7f2 TL |
206 | run_one_command(['systemctl', 'try-restart', 'irqbalance']) |
207 | else: | |
9f95a23c | 208 | perftune_print("Restarting irqbalance directly (init.d)...") |
11fdf7f2 TL |
209 | run_one_command(['/etc/init.d/irqbalance', 'restart']) |
210 | ||
211 | def learn_irqs_from_proc_interrupts(pattern, irq2procline): | |
212 | return [ irq for irq, proc_line in filter(lambda irq_proc_line_pair : re.search(pattern, irq_proc_line_pair[1]), irq2procline.items()) ] | |
213 | ||
214 | def learn_all_irqs_one(irq_conf_dir, irq2procline, xen_dev_name): | |
215 | """ | |
216 | Returns a list of IRQs of a single device. | |
217 | ||
218 | irq_conf_dir: a /sys/... directory with the IRQ information for the given device | |
219 | irq2procline: a map of IRQs to the corresponding lines in the /proc/interrupts | |
220 | xen_dev_name: a device name pattern as it appears in the /proc/interrupts on Xen systems | |
221 | """ | |
222 | msi_irqs_dir_name = os.path.join(irq_conf_dir, 'msi_irqs') | |
223 | # Device uses MSI IRQs | |
224 | if os.path.exists(msi_irqs_dir_name): | |
225 | return os.listdir(msi_irqs_dir_name) | |
226 | ||
227 | irq_file_name = os.path.join(irq_conf_dir, 'irq') | |
228 | # Device uses INT#x | |
229 | if os.path.exists(irq_file_name): | |
230 | return [ line.lstrip().rstrip() for line in open(irq_file_name, 'r').readlines() ] | |
231 | ||
232 | # No irq file detected | |
233 | modalias = open(os.path.join(irq_conf_dir, 'modalias'), 'r').readline() | |
234 | ||
235 | # virtio case | |
236 | if re.search("^virtio", modalias): | |
237 | return list(itertools.chain.from_iterable( | |
238 | map(lambda dirname : learn_irqs_from_proc_interrupts(dirname, irq2procline), | |
239 | filter(lambda dirname : re.search('virtio', dirname), | |
240 | itertools.chain.from_iterable([ dirnames for dirpath, dirnames, filenames in os.walk(os.path.join(irq_conf_dir, 'driver')) ]))))) | |
241 | ||
242 | # xen case | |
243 | if re.search("^xen:", modalias): | |
244 | return learn_irqs_from_proc_interrupts(xen_dev_name, irq2procline) | |
245 | ||
246 | return [] | |
247 | ||
248 | def get_irqs2procline_map(): | |
249 | return { line.split(':')[0].lstrip().rstrip() : line for line in open('/proc/interrupts', 'r').readlines() } | |
250 | ||
1e59de90 TL |
251 | |
252 | class AutodetectError(Exception): | |
253 | pass | |
254 | ||
255 | ||
256 | def auto_detect_irq_mask(cpu_mask, cores_per_irq_core): | |
257 | """ | |
258 | The logic of auto-detection of what was once a 'mode' is generic and is all about the amount of CPUs and NUMA | |
259 | nodes that are present and a restricting 'cpu_mask'. | |
260 | This function implements this logic: | |
261 | ||
262 | * up to 4 CPU threads: use 'cpu_mask' | |
263 | * up to 4 CPU cores (on x86 this would translate to 8 CPU threads): use a single CPU thread out of allowed | |
264 | * up to 16 CPU cores: use a single CPU core out of allowed | |
265 | * more than 16 CPU cores: use a single CPU core for each 16 CPU cores and distribute them evenly among all | |
266 | present NUMA nodes. | |
267 | ||
268 | An AutodetectError exception is raised if 'cpu_mask' is defined in a way that there is a different number of threads | |
269 | and/or cores among different NUMA nodes. In such a case a user needs to provide | |
270 | an IRQ CPUs definition explicitly using 'irq_cpu_mask' parameter. | |
271 | ||
272 | :param cpu_mask: CPU mask that defines which out of present CPUs can be considered for tuning | |
273 | :param cores_per_irq_core number of cores to allocate a single IRQ core out of, e.g. 6 means allocate a single IRQ | |
274 | core out of every 6 CPU cores. | |
275 | :return: CPU mask to bind IRQs to, a.k.a. irq_cpu_mask | |
276 | """ | |
277 | cores_key = 'cores' | |
278 | PUs_key = 'PUs' | |
279 | ||
280 | # List of NUMA IDs that own CPUs from the given CPU mask | |
281 | numa_ids_list = run_hwloc_calc(['-I', 'numa', cpu_mask]).split(",") | |
282 | ||
283 | # Let's calculate number of HTs and cores on each NUMA node belonging to the given CPU set | |
284 | cores_PUs_per_numa = {} # { <numa_id> : {'cores': <number of cores>, 'PUs': <number of PUs>}} | |
285 | for n in numa_ids_list: | |
286 | num_cores = int(run_hwloc_calc(['--restrict', cpu_mask, '--number-of', 'core', f'numa:{n}'])) | |
287 | num_PUs = int(run_hwloc_calc(['--restrict', cpu_mask, '--number-of', 'PU', f'numa:{n}'])) | |
288 | cores_PUs_per_numa[n] = {cores_key: num_cores, PUs_key: num_PUs} | |
289 | ||
290 | # Let's check if configuration on each NUMA is the same. If it's not then we can't auto-detect the IRQs CPU set | |
291 | # and a user needs to provide it explicitly | |
292 | num_cores0 = cores_PUs_per_numa[numa_ids_list[0]][cores_key] | |
293 | num_PUs0 = cores_PUs_per_numa[numa_ids_list[0]][PUs_key] | |
294 | for n in numa_ids_list: | |
295 | if cores_PUs_per_numa[n][cores_key] != num_cores0 or cores_PUs_per_numa[n][PUs_key] != num_PUs0: | |
296 | raise AutodetectError(f"NUMA{n} has a different configuration from NUMA0 for a given CPU mask {cpu_mask}: " | |
297 | f"{cores_PUs_per_numa[n][cores_key]}:{cores_PUs_per_numa[n][PUs_key]} vs " | |
298 | f"{num_cores0}:{num_PUs0}. Auto-detection of IRQ CPUs in not possible. " | |
299 | f"Please, provide irq_cpu_mask explicitly.") | |
300 | ||
301 | # Auto-detection of IRQ CPU set is possible - let's get to it! | |
302 | # | |
303 | # Total counts for the whole machine | |
304 | num_cores = int(run_hwloc_calc(['--restrict', cpu_mask, '--number-of', 'core', 'machine:0'])) | |
305 | num_PUs = int(run_hwloc_calc(['--restrict', cpu_mask, '--number-of', 'PU', 'machine:0'])) | |
306 | ||
307 | if num_PUs <= 4: | |
308 | return cpu_mask | |
309 | elif num_cores <= 4: | |
310 | return run_hwloc_calc(['--restrict', cpu_mask, 'PU:0']) | |
311 | elif num_cores <= cores_per_irq_core: | |
312 | return run_hwloc_calc(['--restrict', cpu_mask, 'core:0']) | |
313 | else: | |
314 | # Big machine. | |
315 | # Let's allocate a full core out of every cores_per_irq_core cores. | |
316 | # Let's distribute IRQ cores among present NUMA nodes | |
317 | num_irq_cores = math.ceil(num_cores / cores_per_irq_core) | |
318 | hwloc_args = [] | |
319 | numa_cores_count = {n: 0 for n in numa_ids_list} | |
320 | added_cores = 0 | |
321 | while added_cores < num_irq_cores: | |
322 | for numa in numa_ids_list: | |
323 | hwloc_args.append(f"node:{numa}.core:{numa_cores_count[numa]}") | |
324 | added_cores += 1 | |
325 | numa_cores_count[numa] += 1 | |
326 | ||
327 | if added_cores >= num_irq_cores: | |
328 | break | |
329 | ||
330 | return run_hwloc_calc(['--restrict', cpu_mask] + hwloc_args) | |
331 | ||
332 | ||
11fdf7f2 TL |
333 | ################################################################################ |
334 | class PerfTunerBase(metaclass=abc.ABCMeta): | |
335 | def __init__(self, args): | |
336 | self.__args = args | |
337 | self.__args.cpu_mask = run_hwloc_calc(['--restrict', self.__args.cpu_mask, 'all']) | |
338 | self.__mode = None | |
1e59de90 TL |
339 | self.__compute_cpu_mask = None |
340 | ||
341 | if self.args.mode: | |
342 | self.mode = PerfTunerBase.SupportedModes[self.args.mode] | |
343 | elif args.irq_cpu_mask: | |
344 | self.irqs_cpu_mask = args.irq_cpu_mask | |
9f95a23c | 345 | else: |
1e59de90 TL |
346 | self.irqs_cpu_mask = auto_detect_irq_mask(self.cpu_mask, self.cores_per_irq_core) |
347 | ||
9f95a23c | 348 | self.__is_aws_i3_nonmetal_instance = None |
11fdf7f2 TL |
349 | |
350 | #### Public methods ########################## | |
9f95a23c TL |
351 | class CPUMaskIsZeroException(Exception): |
352 | """Thrown if CPU mask turns out to be zero""" | |
353 | pass | |
354 | ||
11fdf7f2 TL |
355 | class SupportedModes(enum.IntEnum): |
356 | """ | |
357 | Modes are ordered from the one that cuts the biggest number of CPUs | |
358 | from the compute CPUs' set to the one that takes the smallest ('mq' doesn't | |
359 | cut any CPU from the compute set). | |
360 | ||
361 | This fact is used when we calculate the 'common quotient' mode out of a | |
362 | given set of modes (e.g. default modes of different Tuners) - this would | |
363 | be the smallest among the given modes. | |
364 | """ | |
365 | sq_split = 0 | |
366 | sq = 1 | |
367 | mq = 2 | |
368 | ||
9f95a23c TL |
369 | # Note: no_irq_restrictions should always have the greatest value in the enum since it's the least restricting mode. |
370 | no_irq_restrictions = 9999 | |
371 | ||
11fdf7f2 TL |
372 | @staticmethod |
373 | def names(): | |
374 | return PerfTunerBase.SupportedModes.__members__.keys() | |
375 | ||
9f95a23c TL |
376 | @staticmethod |
377 | def combine(modes): | |
378 | """ | |
379 | :param modes: a set of modes of the PerfTunerBase.SupportedModes type | |
380 | :return: the mode that is the "common ground" for a given set of modes. | |
381 | """ | |
382 | ||
383 | # Perform an explicit cast in order to verify that the values in the 'modes' are compatible with the | |
384 | # expected PerfTunerBase.SupportedModes type. | |
385 | return min([PerfTunerBase.SupportedModes(m) for m in modes]) | |
386 | ||
11fdf7f2 TL |
387 | @staticmethod |
388 | def cpu_mask_is_zero(cpu_mask): | |
389 | """ | |
1e59de90 TL |
390 | The cpu_mask is a comma-separated list of 32-bit hex values with possibly omitted zero components, |
391 | e.g. 0xffff,,0xffff | |
11fdf7f2 TL |
392 | We want to estimate if the whole mask is all-zeros. |
393 | :param cpu_mask: hwloc-calc generated CPU mask | |
394 | :return: True if mask is zero, False otherwise | |
395 | """ | |
1e59de90 TL |
396 | for cur_cpu_mask in cpu_mask.split(','): |
397 | if cur_cpu_mask and int(cur_cpu_mask, 16) != 0: | |
11fdf7f2 TL |
398 | return False |
399 | ||
400 | return True | |
401 | ||
402 | @staticmethod | |
403 | def compute_cpu_mask_for_mode(mq_mode, cpu_mask): | |
404 | mq_mode = PerfTunerBase.SupportedModes(mq_mode) | |
11fdf7f2 TL |
405 | |
406 | if mq_mode == PerfTunerBase.SupportedModes.sq: | |
407 | # all but CPU0 | |
1e59de90 | 408 | compute_cpu_mask = run_hwloc_calc([cpu_mask, '~PU:0']) |
11fdf7f2 TL |
409 | elif mq_mode == PerfTunerBase.SupportedModes.sq_split: |
410 | # all but CPU0 and its HT siblings | |
1e59de90 | 411 | compute_cpu_mask = run_hwloc_calc([cpu_mask, '~core:0']) |
11fdf7f2 TL |
412 | elif mq_mode == PerfTunerBase.SupportedModes.mq: |
413 | # all available cores | |
1e59de90 | 414 | compute_cpu_mask = cpu_mask |
9f95a23c TL |
415 | elif mq_mode == PerfTunerBase.SupportedModes.no_irq_restrictions: |
416 | # all available cores | |
1e59de90 | 417 | compute_cpu_mask = cpu_mask |
11fdf7f2 TL |
418 | else: |
419 | raise Exception("Unsupported mode: {}".format(mq_mode)) | |
420 | ||
1e59de90 | 421 | if PerfTunerBase.cpu_mask_is_zero(compute_cpu_mask): |
9f95a23c | 422 | raise PerfTunerBase.CPUMaskIsZeroException("Bad configuration mode ({}) and cpu-mask value ({}): this results in a zero-mask for compute".format(mq_mode.name, cpu_mask)) |
11fdf7f2 | 423 | |
1e59de90 | 424 | return compute_cpu_mask |
11fdf7f2 TL |
425 | |
426 | @staticmethod | |
427 | def irqs_cpu_mask_for_mode(mq_mode, cpu_mask): | |
428 | mq_mode = PerfTunerBase.SupportedModes(mq_mode) | |
429 | irqs_cpu_mask = 0 | |
430 | ||
9f95a23c | 431 | if mq_mode != PerfTunerBase.SupportedModes.mq and mq_mode != PerfTunerBase.SupportedModes.no_irq_restrictions: |
11fdf7f2 | 432 | irqs_cpu_mask = run_hwloc_calc([cpu_mask, "~{}".format(PerfTunerBase.compute_cpu_mask_for_mode(mq_mode, cpu_mask))]) |
9f95a23c | 433 | else: # mq_mode == PerfTunerBase.SupportedModes.mq or mq_mode == PerfTunerBase.SupportedModes.no_irq_restrictions |
11fdf7f2 TL |
434 | # distribute equally between all available cores |
435 | irqs_cpu_mask = cpu_mask | |
436 | ||
437 | if PerfTunerBase.cpu_mask_is_zero(irqs_cpu_mask): | |
9f95a23c | 438 | raise PerfTunerBase.CPUMaskIsZeroException("Bad configuration mode ({}) and cpu-mask value ({}): this results in a zero-mask for IRQs".format(mq_mode.name, cpu_mask)) |
11fdf7f2 TL |
439 | |
440 | return irqs_cpu_mask | |
441 | ||
442 | @property | |
443 | def mode(self): | |
444 | """ | |
445 | Return the configuration mode | |
446 | """ | |
11fdf7f2 TL |
447 | return self.__mode |
448 | ||
449 | @mode.setter | |
450 | def mode(self, new_mode): | |
451 | """ | |
452 | Set the new configuration mode and recalculate the corresponding masks. | |
453 | """ | |
454 | # Make sure the new_mode is of PerfTunerBase.AllowedModes type | |
455 | self.__mode = PerfTunerBase.SupportedModes(new_mode) | |
456 | self.__compute_cpu_mask = PerfTunerBase.compute_cpu_mask_for_mode(self.__mode, self.__args.cpu_mask) | |
457 | self.__irq_cpu_mask = PerfTunerBase.irqs_cpu_mask_for_mode(self.__mode, self.__args.cpu_mask) | |
458 | ||
20effc67 TL |
459 | @property |
460 | def cpu_mask(self): | |
461 | """ | |
462 | Return the CPU mask we operate on (the total CPU set) | |
463 | """ | |
464 | ||
465 | return self.__args.cpu_mask | |
466 | ||
1e59de90 TL |
467 | @property |
468 | def cores_per_irq_core(self): | |
469 | """ | |
470 | Return the number of cores we are going to allocate a single IRQ core out of when auto-detecting | |
471 | """ | |
472 | return self.__args.cores_per_irq_core | |
473 | ||
474 | @staticmethod | |
475 | def min_cores_per_irq_core(): | |
476 | """ | |
477 | A minimum value of cores_per_irq_core. | |
478 | We don't allocate a full IRQ core if total number of CPU cores is less or equal to 4. | |
479 | """ | |
480 | return 5 | |
481 | ||
11fdf7f2 TL |
482 | @property |
483 | def compute_cpu_mask(self): | |
484 | """ | |
485 | Return the CPU mask to use for seastar application binding. | |
486 | """ | |
11fdf7f2 TL |
487 | return self.__compute_cpu_mask |
488 | ||
489 | @property | |
490 | def irqs_cpu_mask(self): | |
491 | """ | |
492 | Return the mask of CPUs used for IRQs distribution. | |
493 | """ | |
11fdf7f2 TL |
494 | return self.__irq_cpu_mask |
495 | ||
1e59de90 TL |
496 | @irqs_cpu_mask.setter |
497 | def irqs_cpu_mask(self, new_irq_cpu_mask): | |
498 | self.__irq_cpu_mask = new_irq_cpu_mask | |
499 | ||
500 | # Sanity check | |
501 | if PerfTunerBase.cpu_mask_is_zero(self.__irq_cpu_mask): | |
502 | raise PerfTunerBase.CPUMaskIsZeroException("Bad configuration: zero IRQ CPU mask is given") | |
503 | ||
504 | if run_hwloc_calc([self.__irq_cpu_mask]) == run_hwloc_calc([self.cpu_mask]): | |
505 | # Special case: if IRQ CPU mask is the same as total CPU mask - set a Compute CPU mask to cpu_mask | |
506 | self.__compute_cpu_mask = self.cpu_mask | |
507 | else: | |
508 | # Otherwise, a Compute CPU mask is a CPU mask without IRQ CPU mask bits | |
509 | self.__compute_cpu_mask = run_hwloc_calc([self.cpu_mask, f"~{self.__irq_cpu_mask}"]) | |
510 | ||
511 | # Sanity check | |
512 | if PerfTunerBase.cpu_mask_is_zero(self.__compute_cpu_mask): | |
513 | raise PerfTunerBase.CPUMaskIsZeroException( | |
514 | f"Bad configuration: cpu_maks:{self.cpu_mask}, irq_cpu_mask:{self.__irq_cpu_mask}: " | |
515 | f"results in a zero-mask for compute") | |
516 | ||
9f95a23c TL |
517 | @property |
518 | def is_aws_i3_non_metal_instance(self): | |
519 | """ | |
520 | :return: True if we are running on the AWS i3.nonmetal instance, e.g. i3.4xlarge | |
521 | """ | |
522 | if self.__is_aws_i3_nonmetal_instance is None: | |
523 | self.__check_host_type() | |
524 | ||
525 | return self.__is_aws_i3_nonmetal_instance | |
526 | ||
11fdf7f2 TL |
527 | @property |
528 | def args(self): | |
529 | return self.__args | |
530 | ||
531 | @property | |
532 | def irqs(self): | |
533 | return self._get_irqs() | |
534 | ||
535 | #### "Protected"/Public (pure virtual) methods ########### | |
536 | @abc.abstractmethod | |
537 | def tune(self): | |
538 | pass | |
539 | ||
11fdf7f2 TL |
540 | |
541 | @abc.abstractmethod | |
542 | def _get_irqs(self): | |
543 | """ | |
544 | Return the iteratable value with all IRQs to be configured. | |
545 | """ | |
546 | pass | |
547 | ||
548 | #### Private methods ############################ | |
9f95a23c TL |
549 | def __check_host_type(self): |
550 | """ | |
551 | Check if we are running on the AWS i3 nonmetal instance. | |
552 | If yes, set self.__is_aws_i3_nonmetal_instance to True, and to False otherwise. | |
553 | """ | |
554 | try: | |
555 | aws_instance_type = urllib.request.urlopen("http://169.254.169.254/latest/meta-data/instance-type", timeout=0.1).read().decode() | |
556 | if re.match(r'^i3\.((?!metal)\w)+$', aws_instance_type): | |
557 | self.__is_aws_i3_nonmetal_instance = True | |
558 | else: | |
559 | self.__is_aws_i3_nonmetal_instance = False | |
560 | ||
561 | return | |
562 | except (urllib.error.URLError, ConnectionError, TimeoutError): | |
563 | # Non-AWS case | |
564 | pass | |
565 | except: | |
566 | logging.warning("Unexpected exception while attempting to access AWS meta server: {}".format(sys.exc_info()[0])) | |
567 | ||
568 | self.__is_aws_i3_nonmetal_instance = False | |
569 | ||
11fdf7f2 TL |
570 | ################################################# |
571 | class NetPerfTuner(PerfTunerBase): | |
572 | def __init__(self, args): | |
573 | super().__init__(args) | |
574 | ||
20effc67 TL |
575 | self.nics=args.nics |
576 | ||
11fdf7f2 TL |
577 | self.__nic_is_bond_iface = self.__check_dev_is_bond_iface() |
578 | self.__slaves = self.__learn_slaves() | |
579 | ||
20effc67 TL |
580 | # check that self.nics contain a HW device or a bonding interface |
581 | self.__check_nics() | |
11fdf7f2 | 582 | |
1e59de90 TL |
583 | # Fetch IRQs related info |
584 | self.__get_irqs_info() | |
11fdf7f2 | 585 | |
20effc67 | 586 | |
11fdf7f2 TL |
587 | #### Public methods ############################ |
588 | def tune(self): | |
589 | """ | |
590 | Tune the networking server configuration. | |
591 | """ | |
20effc67 TL |
592 | for nic in self.nics: |
593 | if self.nic_is_hw_iface(nic): | |
594 | perftune_print("Setting a physical interface {}...".format(nic)) | |
595 | self.__setup_one_hw_iface(nic) | |
596 | else: | |
597 | perftune_print("Setting {} bonding interface...".format(nic)) | |
598 | self.__setup_bonding_iface(nic) | |
11fdf7f2 TL |
599 | |
600 | # Increase the socket listen() backlog | |
601 | fwriteln_and_log('/proc/sys/net/core/somaxconn', '4096') | |
602 | ||
603 | # Increase the maximum number of remembered connection requests, which are still | |
604 | # did not receive an acknowledgment from connecting client. | |
605 | fwriteln_and_log('/proc/sys/net/ipv4/tcp_max_syn_backlog', '4096') | |
606 | ||
20effc67 TL |
607 | def nic_is_bond_iface(self, nic): |
608 | return self.__nic_is_bond_iface[nic] | |
11fdf7f2 | 609 | |
20effc67 TL |
610 | def nic_exists(self, nic): |
611 | return self.__iface_exists(nic) | |
f67539c2 | 612 | |
20effc67 TL |
613 | def nic_is_hw_iface(self, nic): |
614 | return self.__dev_is_hw_iface(nic) | |
11fdf7f2 | 615 | |
20effc67 | 616 | def slaves(self, nic): |
11fdf7f2 | 617 | """ |
20effc67 | 618 | Returns an iterator for all slaves of the nic. |
11fdf7f2 TL |
619 | If agrs.nic is not a bonding interface an attempt to use the returned iterator |
620 | will immediately raise a StopIteration exception - use __dev_is_bond_iface() check to avoid this. | |
621 | """ | |
20effc67 | 622 | return iter(self.__slaves[nic]) |
11fdf7f2 TL |
623 | |
624 | #### Protected methods ########################## | |
11fdf7f2 TL |
625 | def _get_irqs(self): |
626 | """ | |
20effc67 | 627 | Returns the iterator for all IRQs that are going to be configured (according to args.nics parameter). |
11fdf7f2 TL |
628 | For instance, for a bonding interface that's going to include IRQs of all its slaves. |
629 | """ | |
630 | return itertools.chain.from_iterable(self.__nic2irqs.values()) | |
631 | ||
632 | #### Private methods ############################ | |
1e59de90 TL |
633 | def __get_irqs_info(self): |
634 | self.__irqs2procline = get_irqs2procline_map() | |
635 | self.__nic2irqs = self.__learn_irqs() | |
636 | ||
11fdf7f2 TL |
637 | @property |
638 | def __rfs_table_size(self): | |
639 | return 32768 | |
640 | ||
20effc67 | 641 | def __check_nics(self): |
11fdf7f2 | 642 | """ |
20effc67 | 643 | Checks that self.nics are supported interfaces |
11fdf7f2 | 644 | """ |
20effc67 TL |
645 | for nic in self.nics: |
646 | if not self.nic_exists(nic): | |
647 | raise Exception("Device {} does not exist".format(nic)) | |
648 | if not self.nic_is_hw_iface(nic) and not self.nic_is_bond_iface(nic): | |
649 | raise Exception("Not supported virtual device {}".format(nic)) | |
11fdf7f2 TL |
650 | |
651 | def __get_irqs_one(self, iface): | |
652 | """ | |
653 | Returns the list of IRQ numbers for the given interface. | |
654 | """ | |
655 | return self.__nic2irqs[iface] | |
656 | ||
657 | def __setup_rfs(self, iface): | |
658 | rps_limits = glob.glob("/sys/class/net/{}/queues/*/rps_flow_cnt".format(iface)) | |
659 | one_q_limit = int(self.__rfs_table_size / len(rps_limits)) | |
660 | ||
661 | # If RFS feature is not present - get out | |
662 | try: | |
663 | run_one_command(['sysctl', 'net.core.rps_sock_flow_entries']) | |
664 | except: | |
665 | return | |
666 | ||
667 | # Enable RFS | |
9f95a23c | 668 | perftune_print("Setting net.core.rps_sock_flow_entries to {}".format(self.__rfs_table_size)) |
11fdf7f2 TL |
669 | run_one_command(['sysctl', '-w', 'net.core.rps_sock_flow_entries={}'.format(self.__rfs_table_size)]) |
670 | ||
671 | # Set each RPS queue limit | |
672 | for rfs_limit_cnt in rps_limits: | |
9f95a23c TL |
673 | msg = "Setting limit {} in {}".format(one_q_limit, rfs_limit_cnt) |
674 | fwriteln(rfs_limit_cnt, "{}".format(one_q_limit), log_message=msg) | |
11fdf7f2 | 675 | |
1e59de90 TL |
676 | # Enable/Disable ntuple filtering HW offload on the NIC. This is going to enable/disable aRFS on NICs supporting |
677 | # aRFS since ntuple is pre-requisite for an aRFS feature. | |
678 | # If no explicit configuration has been requested enable ntuple (and thereby aRFS) only in MQ mode. | |
679 | # | |
680 | # aRFS acts similar to (SW) RFS: it places a TCP packet on a HW queue that it supposed to be "close" to an | |
681 | # application thread that sent a packet on the same TCP stream. | |
682 | # | |
683 | # For instance if a given TCP stream was sent from CPU3 then the next Rx packet is going to be placed in an Rx | |
684 | # HW queue which IRQ affinity is set to CPU3 or otherwise to the one with affinity close enough to CPU3. | |
685 | # | |
686 | # Read more here: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/performance_tuning_guide/network-acc-rfs | |
687 | # | |
688 | # Obviously it would achieve the best result if there is at least one Rx HW queue with an affinity set to each | |
689 | # application threads that handle TCP. | |
690 | # | |
691 | # And, similarly, if we know in advance that there won't be any such HW queue (sq and sq_split modes) - there is | |
692 | # no sense enabling aRFS. | |
693 | op = "Enable" | |
694 | value = 'on' | |
695 | ||
696 | if (self.args.enable_arfs is None and self.irqs_cpu_mask == self.cpu_mask) or self.args.enable_arfs is False: | |
697 | op = "Disable" | |
698 | value = 'off' | |
699 | ||
700 | ethtool_msg = "{} ntuple filtering HW offload for {}...".format(op, iface) | |
701 | ||
9f95a23c | 702 | if dry_run_mode: |
1e59de90 TL |
703 | perftune_print(ethtool_msg) |
704 | run_one_command(['ethtool','-K', iface, 'ntuple', value], stderr=subprocess.DEVNULL) | |
9f95a23c TL |
705 | else: |
706 | try: | |
1e59de90 TL |
707 | print("Trying to {} ntuple filtering HW offload for {}...".format(op.lower(), iface), end='') |
708 | run_one_command(['ethtool','-K', iface, 'ntuple', value], stderr=subprocess.DEVNULL) | |
9f95a23c TL |
709 | print("ok") |
710 | except: | |
711 | print("not supported") | |
11fdf7f2 TL |
712 | |
713 | def __setup_rps(self, iface, mask): | |
714 | for one_rps_cpus in self.__get_rps_cpus(iface): | |
715 | set_one_mask(one_rps_cpus, mask) | |
716 | ||
717 | self.__setup_rfs(iface) | |
718 | ||
719 | def __setup_xps(self, iface): | |
720 | xps_cpus_list = glob.glob("/sys/class/net/{}/queues/*/xps_cpus".format(iface)) | |
721 | masks = run_hwloc_distrib(["{}".format(len(xps_cpus_list))]) | |
722 | ||
723 | for i, mask in enumerate(masks): | |
724 | set_one_mask(xps_cpus_list[i], mask) | |
725 | ||
f67539c2 TL |
726 | def __iface_exists(self, iface): |
727 | if len(iface) == 0: | |
728 | return False | |
729 | return os.path.exists("/sys/class/net/{}".format(iface)) | |
730 | ||
11fdf7f2 TL |
731 | def __dev_is_hw_iface(self, iface): |
732 | return os.path.exists("/sys/class/net/{}/device".format(iface)) | |
733 | ||
734 | def __check_dev_is_bond_iface(self): | |
20effc67 | 735 | bond_dict = {} |
11fdf7f2 | 736 | if not os.path.exists('/sys/class/net/bonding_masters'): |
20effc67 TL |
737 | for nic in self.nics: |
738 | bond_dict[nic] = False | |
739 | #return False for every nic | |
740 | return bond_dict | |
741 | for nic in self.nics: | |
742 | bond_dict[nic] = any([re.search(nic, line) for line in open('/sys/class/net/bonding_masters', 'r').readlines()]) | |
743 | return bond_dict | |
11fdf7f2 TL |
744 | |
745 | def __learn_slaves(self): | |
20effc67 TL |
746 | slaves_list_per_nic = {} |
747 | for nic in self.nics: | |
748 | if self.nic_is_bond_iface(nic): | |
749 | slaves_list_per_nic[nic] = list(itertools.chain.from_iterable([line.split() for line in open("/sys/class/net/{}/bonding/slaves".format(nic), 'r').readlines()])) | |
11fdf7f2 | 750 | |
20effc67 | 751 | return slaves_list_per_nic |
11fdf7f2 TL |
752 | |
753 | def __intel_irq_to_queue_idx(self, irq): | |
754 | """ | |
755 | Return the HW queue index for a given IRQ for Intel NICs in order to sort the IRQs' list by this index. | |
756 | ||
757 | Intel's fast path IRQs have the following name convention: | |
758 | <bla-bla>-TxRx-<queue index> | |
759 | ||
1e59de90 | 760 | Intel NICs also have the IRQ for Flow Director (which is not a regular fast path IRQ) whose name looks like |
11fdf7f2 TL |
761 | this: |
762 | <bla-bla>:fdir-TxRx-<index> | |
763 | ||
764 | We want to put the Flow Director's IRQ at the end of the sorted list of IRQs. | |
765 | ||
766 | :param irq: IRQ number | |
1e59de90 | 767 | :return: HW queue index for Intel NICs and sys.maxsize for all other NICs |
11fdf7f2 TL |
768 | """ |
769 | intel_fp_irq_re = re.compile("\-TxRx\-(\d+)") | |
770 | fdir_re = re.compile("fdir\-TxRx\-\d+") | |
771 | ||
772 | m = intel_fp_irq_re.search(self.__irqs2procline[irq]) | |
773 | m1 = fdir_re.search(self.__irqs2procline[irq]) | |
774 | if m and not m1: | |
775 | return int(m.group(1)) | |
776 | else: | |
777 | return sys.maxsize | |
778 | ||
20effc67 TL |
779 | def __mlx_irq_to_queue_idx(self, irq): |
780 | """ | |
781 | Return the HW queue index for a given IRQ for Mellanox NICs in order to sort the IRQs' list by this index. | |
782 | ||
783 | Mellanox NICs have the IRQ which name looks like | |
784 | this: | |
785 | mlx5_comp23 | |
786 | mlx5_comp<index> | |
787 | or this: | |
788 | mlx4-6 | |
789 | mlx4-<index> | |
790 | ||
791 | :param irq: IRQ number | |
1e59de90 | 792 | :return: HW queue index for Mellanox NICs and sys.maxsize for all other NICs |
20effc67 TL |
793 | """ |
794 | mlx5_fp_irq_re = re.compile("mlx5_comp(\d+)") | |
795 | mlx4_fp_irq_re = re.compile("mlx4\-(\d+)") | |
796 | ||
797 | m5 = mlx5_fp_irq_re.search(self.__irqs2procline[irq]) | |
798 | if m5: | |
799 | return int(m5.group(1)) | |
800 | else: | |
801 | m4 = mlx4_fp_irq_re.search(self.__irqs2procline[irq]) | |
802 | if m4: | |
803 | return int(m4.group(1)) | |
804 | ||
805 | return sys.maxsize | |
806 | ||
1e59de90 TL |
807 | def __virtio_irq_to_queue_idx(self, irq): |
808 | """ | |
809 | Return the HW queue index for a given IRQ for VIRTIO in order to sort the IRQs' list by this index. | |
810 | ||
811 | VIRTIO NICs have the IRQ's name that looks like this: | |
812 | Queue K of a device virtioY, where Y is some integer is comprised of 2 IRQs | |
813 | with following names: | |
814 | * Tx IRQ: | |
815 | virtioY-output.K | |
816 | * Rx IRQ: | |
817 | virtioY-input.K | |
818 | ||
819 | :param irq: IRQ number | |
820 | :return: HW queue index for VIRTIO fast path IRQ and sys.maxsize for all other IRQs | |
821 | """ | |
822 | virtio_fp_re = re.compile(r"virtio\d+-(input|output)\.(\d+)$") | |
823 | ||
824 | virtio_fp_irq = virtio_fp_re.search(self.__irqs2procline[irq]) | |
825 | if virtio_fp_irq: | |
826 | return int(virtio_fp_irq.group(2)) | |
827 | ||
828 | return sys.maxsize | |
829 | ||
830 | ||
20effc67 TL |
831 | def __get_driver_name(self, iface): |
832 | """ | |
833 | :param iface: Interface to check | |
834 | :return: driver name from ethtool | |
835 | """ | |
836 | ||
837 | driver_name = '' | |
838 | ethtool_i_lines = run_ethtool(['-i', iface]) | |
839 | driver_re = re.compile("driver:") | |
840 | driver_lines = list(filter(lambda one_line: driver_re.search(one_line), ethtool_i_lines)) | |
841 | ||
842 | if driver_lines: | |
843 | if len(driver_lines) > 1: | |
844 | raise Exception("More than one 'driver:' entries in the 'ethtool -i {}' output. Unable to continue.".format(iface)) | |
845 | ||
846 | driver_name = driver_lines[0].split()[1].strip() | |
847 | ||
848 | return driver_name | |
849 | ||
11fdf7f2 TL |
850 | def __learn_irqs_one(self, iface): |
851 | """ | |
852 | This is a slow method that is going to read from the system files. Never | |
853 | use it outside the initialization code. Use __get_irqs_one() instead. | |
854 | ||
855 | Filter the fast path queues IRQs from the __get_all_irqs_one() result according to the known | |
856 | patterns. | |
857 | Right now we know about the following naming convention of the fast path queues vectors: | |
858 | - Intel: <bla-bla>-TxRx-<bla-bla> | |
859 | - Broadcom: <bla-bla>-fp-<bla-bla> | |
860 | - ena: <bla-bla>-Tx-Rx-<bla-bla> | |
20effc67 TL |
861 | - Mellanox: for mlx4 |
862 | mlx4-<queue idx>@<bla-bla> | |
863 | or for mlx5 | |
864 | mlx5_comp<queue idx>@<bla-bla> | |
1e59de90 | 865 | - VIRTIO: virtioN-[input|output].D |
11fdf7f2 TL |
866 | |
867 | So, we will try to filter the etries in /proc/interrupts for IRQs we've got from get_all_irqs_one() | |
868 | according to the patterns above. | |
869 | ||
870 | If as a result all IRQs are filtered out (if there are no IRQs with the names from the patterns above) then | |
871 | this means that the given NIC uses a different IRQs naming pattern. In this case we won't filter any IRQ. | |
872 | ||
873 | Otherwise, we will use only IRQs which names fit one of the patterns above. | |
874 | ||
875 | For NICs with a limited number of Rx queues the IRQs that handle Rx are going to be at the beginning of the | |
876 | list. | |
877 | """ | |
878 | # filter 'all_irqs' to only reference valid keys from 'irqs2procline' and avoid an IndexError on the 'irqs' search below | |
879 | all_irqs = set(learn_all_irqs_one("/sys/class/net/{}/device".format(iface), self.__irqs2procline, iface)).intersection(self.__irqs2procline.keys()) | |
1e59de90 TL |
880 | fp_irqs_re = re.compile("\-TxRx\-|\-fp\-|\-Tx\-Rx\-|mlx4-\d+@|mlx5_comp\d+@|virtio\d+-(input|output)") |
881 | irqs = sorted(list(filter(lambda irq : fp_irqs_re.search(self.__irqs2procline[irq]), all_irqs))) | |
11fdf7f2 | 882 | if irqs: |
1e59de90 | 883 | irqs.sort(key=self.__get_irq_to_queue_idx_functor(iface)) |
11fdf7f2 TL |
884 | return irqs |
885 | else: | |
886 | return list(all_irqs) | |
887 | ||
1e59de90 TL |
888 | def __get_irq_to_queue_idx_functor(self, iface): |
889 | """ | |
890 | Get a functor returning a queue index for a given IRQ. | |
891 | This functor is needed for NICs that are known to not release IRQs when the number of Rx | |
892 | channels is reduced or have extra IRQs for non-RSS channels. | |
893 | ||
894 | Therefore, for these NICs we need a functor that would allow us to pick IRQs that belong to channels that are | |
895 | going to handle TCP traffic: first X channels, where the value of X depends on the NIC's type and configuration. | |
896 | ||
897 | For others, e.g. ENA, or Broadcom, which are only going to allocate IRQs that belong to TCP handling channels, | |
898 | we don't really need to sort them as long as we filter fast path IRQs and distribute them evenly among IRQ CPUs. | |
899 | ||
900 | :param iface: NIC's interface name, e.g. eth19 | |
901 | :return: A functor that returns a queue index for a given IRQ if a mapping is known | |
902 | or a constant big integer value if mapping is unknown. | |
903 | """ | |
904 | # There are a few known drivers for which we know how to get a queue index from an IRQ name in /proc/interrupts | |
905 | driver_name = self.__get_driver_name(iface) | |
906 | ||
907 | # Every functor returns a sys.maxsize for an unknown driver IRQs. | |
908 | # So, choosing Intel's as a default is as good as any other. | |
909 | irq_to_idx_func = self.__intel_irq_to_queue_idx | |
910 | if driver_name.startswith("mlx"): | |
911 | irq_to_idx_func = self.__mlx_irq_to_queue_idx | |
912 | elif driver_name.startswith("virtio"): | |
913 | irq_to_idx_func = self.__virtio_irq_to_queue_idx | |
914 | ||
915 | return irq_to_idx_func | |
916 | ||
917 | def __irq_lower_bound_by_queue(self, iface, irqs, queue_idx): | |
918 | """ | |
919 | Get the index of the first element in irqs array which queue is greater or equal to a given index. | |
920 | IRQs array is supposed to be sorted by queues numbers IRQs belong to. | |
921 | ||
922 | There are additional assumptions: | |
923 | * IRQs array items queue numbers are monotonically not decreasing, and if it increases then it increases by | |
924 | one. | |
925 | * Queue indexes are numbered starting from zero. | |
926 | ||
927 | :param irqs: IRQs array sorted by queues numbers IRQs belong to | |
928 | :param queue_idx: Queue index to partition by | |
929 | :return: The first index in the IRQs array that corresponds to a queue number greater or equal to a given index | |
930 | which is at least queue_idx. If there is no such IRQ - returns len(irqs). | |
931 | """ | |
932 | irq_to_idx_func = self.__get_irq_to_queue_idx_functor(iface) | |
933 | ||
934 | if queue_idx < len(irqs): | |
935 | for idx in range(queue_idx, len(irqs)): | |
936 | if irq_to_idx_func(irqs[idx]) >= queue_idx: | |
937 | return idx | |
938 | ||
939 | return len(irqs) | |
940 | ||
11fdf7f2 TL |
941 | def __learn_irqs(self): |
942 | """ | |
943 | This is a slow method that is going to read from the system files. Never | |
944 | use it outside the initialization code. | |
945 | """ | |
20effc67 TL |
946 | nic_irq_dict={} |
947 | for nic in self.nics: | |
948 | if self.nic_is_bond_iface(nic): | |
949 | for slave in filter(self.__dev_is_hw_iface, self.slaves(nic)): | |
950 | nic_irq_dict[slave] = self.__learn_irqs_one(slave) | |
951 | else: | |
952 | nic_irq_dict[nic] = self.__learn_irqs_one(nic) | |
953 | return nic_irq_dict | |
11fdf7f2 TL |
954 | |
955 | def __get_rps_cpus(self, iface): | |
956 | """ | |
957 | Prints all rps_cpus files names for the given HW interface. | |
958 | ||
959 | There is a single rps_cpus file for each RPS queue and there is a single RPS | |
960 | queue for each HW Rx queue. Each HW Rx queue should have an IRQ. | |
961 | Therefore the number of these files is equal to the number of fast path Rx IRQs for this interface. | |
962 | """ | |
963 | return glob.glob("/sys/class/net/{}/queues/*/rps_cpus".format(iface)) | |
964 | ||
1e59de90 TL |
965 | def __set_rx_channels_count(self, iface, count): |
966 | """ | |
967 | Try to set the number of Rx channels of a given interface to a given value. | |
968 | ||
969 | Rx channels of any NIC can be configured using 'ethtool -L' command using one of the following semantics: | |
970 | ||
971 | ethtool -L <iface> rx <count> | |
972 | or | |
973 | ethtool -L <iface> combined <count> | |
974 | ||
975 | If a specific semantics is not supported by a given NIC or if changing the number of channels is not supported | |
976 | ethtool is going to return an error. | |
977 | ||
978 | Instead of parsing and trying to detect which one of the following semantics a given interface supports we will | |
979 | simply try to use both semantics till either one of them succeeds or both fail. | |
980 | ||
981 | ||
982 | :param iface: NIC interface name, e.g. eth4 | |
983 | :param count: number of Rx channels we want to configure | |
984 | :return: True if configuration was successful, False otherwise | |
985 | """ | |
986 | options = ["rx", "combined"] | |
987 | for o in options: | |
988 | try: | |
989 | cmd = ['ethtool', '-L', iface, o, f"{count}"] | |
990 | perftune_print(f"Executing: {' '.join(cmd)}") | |
991 | run_one_command(cmd, stderr=subprocess.DEVNULL) | |
992 | return True | |
993 | except subprocess.CalledProcessError: | |
994 | pass | |
995 | ||
996 | return False | |
997 | ||
11fdf7f2 | 998 | def __setup_one_hw_iface(self, iface): |
1e59de90 TL |
999 | # Set Rx channels count to a number of IRQ CPUs unless an explicit count is given |
1000 | if self.args.num_rx_queues is not None: | |
1001 | num_rx_channels = self.args.num_rx_queues | |
1002 | else: | |
1003 | num_rx_channels = 0 | |
1004 | ||
1005 | # If a mask is wider than 32 bits it's going to be presented as a comma-separated list of 32-bit masks | |
1006 | # with possibly omitted zero components, e.g. 0x01,0x100,,0x12122 | |
1007 | for m in self.irqs_cpu_mask.split(","): | |
1008 | if m: | |
1009 | num_rx_channels += bin(int(m, 16)).count('1') | |
1010 | ||
1011 | # Let's try setting the number of Rx channels to the number of IRQ CPUs. | |
1012 | # | |
1013 | # If we were able to change the number of Rx channels the number of IRQs could have changed. | |
1014 | # In this case let's refresh IRQs info. | |
1015 | rx_channels_set = self.__set_rx_channels_count(iface, num_rx_channels) | |
1016 | if rx_channels_set: | |
1017 | self.__get_irqs_info() | |
1018 | ||
11fdf7f2 TL |
1019 | max_num_rx_queues = self.__max_rx_queue_count(iface) |
1020 | all_irqs = self.__get_irqs_one(iface) | |
1021 | ||
1022 | # Bind the NIC's IRQs according to the configuration mode | |
1023 | # | |
1024 | # If this NIC has a limited number of Rx queues then we want to distribute their IRQs separately. | |
1025 | # For such NICs we've sorted IRQs list so that IRQs that handle Rx are all at the head of the list. | |
1e59de90 | 1026 | if rx_channels_set or max_num_rx_queues < len(all_irqs): |
11fdf7f2 | 1027 | num_rx_queues = self.__get_rx_queue_count(iface) |
1e59de90 TL |
1028 | tcp_irqs_lower_bound = self.__irq_lower_bound_by_queue(iface, all_irqs, num_rx_queues) |
1029 | perftune_print(f"Distributing IRQs handling Rx and Tx for first {num_rx_queues} channels:") | |
1030 | distribute_irqs(all_irqs[0:tcp_irqs_lower_bound], self.irqs_cpu_mask) | |
9f95a23c | 1031 | perftune_print("Distributing the rest of IRQs") |
1e59de90 | 1032 | distribute_irqs(all_irqs[tcp_irqs_lower_bound:], self.irqs_cpu_mask) |
11fdf7f2 | 1033 | else: |
9f95a23c | 1034 | perftune_print("Distributing all IRQs") |
11fdf7f2 TL |
1035 | distribute_irqs(all_irqs, self.irqs_cpu_mask) |
1036 | ||
20effc67 | 1037 | self.__setup_rps(iface, self.cpu_mask) |
11fdf7f2 TL |
1038 | self.__setup_xps(iface) |
1039 | ||
20effc67 TL |
1040 | def __setup_bonding_iface(self, nic): |
1041 | for slave in self.slaves(nic): | |
11fdf7f2 | 1042 | if self.__dev_is_hw_iface(slave): |
9f95a23c | 1043 | perftune_print("Setting up {}...".format(slave)) |
11fdf7f2 TL |
1044 | self.__setup_one_hw_iface(slave) |
1045 | else: | |
9f95a23c | 1046 | perftune_print("Skipping {} (not a physical slave device?)".format(slave)) |
11fdf7f2 TL |
1047 | |
1048 | def __max_rx_queue_count(self, iface): | |
1049 | """ | |
1050 | :param iface: Interface to check | |
1051 | :return: The maximum number of RSS queues for the given interface if there is known limitation and sys.maxsize | |
1052 | otherwise. | |
1053 | ||
1054 | Networking drivers serving HW with the known maximum RSS queue limitation (due to lack of RSS bits): | |
1055 | ||
1056 | ixgbe: PF NICs support up to 16 RSS queues. | |
1057 | ixgbevf: VF NICs support up to 4 RSS queues. | |
1058 | i40e: PF NICs support up to 64 RSS queues. | |
1059 | i40evf: VF NICs support up to 16 RSS queues. | |
1060 | ||
1061 | """ | |
1062 | driver_to_max_rss = {'ixgbe': 16, 'ixgbevf': 4, 'i40e': 64, 'i40evf': 16} | |
1063 | ||
20effc67 | 1064 | driver_name = self.__get_driver_name(iface) |
11fdf7f2 TL |
1065 | return driver_to_max_rss.get(driver_name, sys.maxsize) |
1066 | ||
1067 | def __get_rx_queue_count(self, iface): | |
1068 | """ | |
1069 | :return: the RSS Rx queues count for the given interface. | |
1070 | """ | |
1071 | num_irqs = len(self.__get_irqs_one(iface)) | |
1072 | rx_queues_count = len(self.__get_rps_cpus(iface)) | |
1073 | ||
1074 | if rx_queues_count == 0: | |
1075 | rx_queues_count = num_irqs | |
1076 | ||
1077 | return min(self.__max_rx_queue_count(iface), rx_queues_count) | |
1078 | ||
11fdf7f2 | 1079 | |
11fdf7f2 | 1080 | |
9f95a23c TL |
1081 | class ClocksourceManager: |
1082 | class PreferredClockSourceNotAvailableException(Exception): | |
1083 | pass | |
1084 | ||
1085 | def __init__(self, args): | |
1086 | self.__args = args | |
f67539c2 TL |
1087 | self._preferred = {"x86_64": "tsc", "kvm": "kvm-clock"} |
1088 | self._arch = self._get_arch() | |
9f95a23c TL |
1089 | self._available_clocksources_file = "/sys/devices/system/clocksource/clocksource0/available_clocksource" |
1090 | self._current_clocksource_file = "/sys/devices/system/clocksource/clocksource0/current_clocksource" | |
f67539c2 | 1091 | self._recommendation_if_unavailable = { "x86_64": "The tsc clocksource is not available. Consider using a hardware platform where the tsc clocksource is available, or try forcing it withe the tsc=reliable boot option", "kvm": "kvm-clock is not available" } |
9f95a23c TL |
1092 | |
1093 | def _available_clocksources(self): | |
1094 | return open(self._available_clocksources_file).readline().split() | |
1095 | ||
1096 | def _current_clocksource(self): | |
1097 | return open(self._current_clocksource_file).readline().strip() | |
1098 | ||
f67539c2 TL |
1099 | def _get_arch(self): |
1100 | try: | |
1101 | virt = run_read_only_command(['systemd-detect-virt']).strip() | |
1102 | if virt == "kvm": | |
1103 | return virt | |
1104 | except: | |
1105 | pass | |
1106 | return platform.machine() | |
1107 | ||
9f95a23c TL |
1108 | def enforce_preferred_clocksource(self): |
1109 | fwriteln(self._current_clocksource_file, self._preferred[self._arch], "Setting clocksource to {}".format(self._preferred[self._arch])) | |
1110 | ||
1111 | def preferred(self): | |
1112 | return self._preferred[self._arch] | |
1113 | ||
1114 | def setting_available(self): | |
1115 | return self._arch in self._preferred | |
1116 | ||
1117 | def preferred_clocksource_available(self): | |
1118 | return self._preferred[self._arch] in self._available_clocksources() | |
1119 | ||
1120 | def recommendation_if_unavailable(self): | |
1121 | return self._recommendation_if_unavailable[self._arch] | |
1122 | ||
1123 | class SystemPerfTuner(PerfTunerBase): | |
1124 | def __init__(self, args): | |
1125 | super().__init__(args) | |
1126 | self._clocksource_manager = ClocksourceManager(args) | |
1127 | ||
1128 | def tune(self): | |
1129 | if self.args.tune_clock: | |
1130 | if not self._clocksource_manager.setting_available(): | |
1e59de90 | 1131 | perftune_print("Clocksource setting not available or not needed for this architecture. Not tuning") |
9f95a23c TL |
1132 | elif not self._clocksource_manager.preferred_clocksource_available(): |
1133 | perftune_print(self._clocksource_manager.recommendation_if_unavailable()) | |
1134 | else: | |
1135 | self._clocksource_manager.enforce_preferred_clocksource() | |
1136 | ||
1137 | #### Protected methods ########################## | |
9f95a23c TL |
1138 | def _get_irqs(self): |
1139 | return [] | |
1140 | ||
1141 | ||
11fdf7f2 TL |
1142 | ################################################# |
1143 | class DiskPerfTuner(PerfTunerBase): | |
1144 | class SupportedDiskTypes(enum.IntEnum): | |
1145 | nvme = 0 | |
1146 | non_nvme = 1 | |
1147 | ||
1148 | def __init__(self, args): | |
1149 | super().__init__(args) | |
1150 | ||
1151 | if not (self.args.dirs or self.args.devs): | |
1152 | raise Exception("'disks' tuning was requested but neither directories nor storage devices were given") | |
1153 | ||
1154 | self.__pyudev_ctx = pyudev.Context() | |
1155 | self.__dir2disks = self.__learn_directories() | |
1156 | self.__irqs2procline = get_irqs2procline_map() | |
1157 | self.__disk2irqs = self.__learn_irqs() | |
1158 | self.__type2diskinfo = self.__group_disks_info_by_type() | |
1159 | ||
1160 | # sets of devices that have already been tuned | |
1161 | self.__io_scheduler_tuned_devs = set() | |
1162 | self.__nomerges_tuned_devs = set() | |
20effc67 | 1163 | self.__write_back_cache_tuned_devs = set() |
11fdf7f2 TL |
1164 | |
1165 | #### Public methods ############################# | |
1166 | def tune(self): | |
1167 | """ | |
1168 | Distribute IRQs according to the requested mode (args.mode): | |
1169 | - Distribute NVMe disks' IRQs equally among all available CPUs. | |
1170 | - Distribute non-NVMe disks' IRQs equally among designated CPUs or among | |
1171 | all available CPUs in the 'mq' mode. | |
1172 | """ | |
11fdf7f2 TL |
1173 | non_nvme_disks, non_nvme_irqs = self.__disks_info_by_type(DiskPerfTuner.SupportedDiskTypes.non_nvme) |
1174 | if non_nvme_disks: | |
9f95a23c | 1175 | perftune_print("Setting non-NVMe disks: {}...".format(", ".join(non_nvme_disks))) |
1e59de90 | 1176 | distribute_irqs(non_nvme_irqs, self.irqs_cpu_mask) |
11fdf7f2 TL |
1177 | self.__tune_disks(non_nvme_disks) |
1178 | else: | |
9f95a23c | 1179 | perftune_print("No non-NVMe disks to tune") |
11fdf7f2 TL |
1180 | |
1181 | nvme_disks, nvme_irqs = self.__disks_info_by_type(DiskPerfTuner.SupportedDiskTypes.nvme) | |
1182 | if nvme_disks: | |
9f95a23c TL |
1183 | # Linux kernel is going to use IRQD_AFFINITY_MANAGED mode for NVMe IRQs |
1184 | # on most systems (currently only AWS i3 non-metal are known to have a | |
1185 | # different configuration). SMP affinity of an IRQ in this mode may not be | |
1186 | # changed and an attempt to modify it is going to fail. However right now | |
1187 | # the only way to determine that IRQD_AFFINITY_MANAGED mode has been used | |
1188 | # is to attempt to modify IRQ SMP affinity (and fail) therefore we prefer | |
1189 | # to always do it. | |
1190 | # | |
1191 | # What we don't want however is to see annoying errors every time we | |
1192 | # detect that IRQD_AFFINITY_MANAGED was actually used. Therefore we will only log | |
1193 | # them in the "verbose" mode or when we run on an i3.nonmetal AWS instance. | |
1194 | perftune_print("Setting NVMe disks: {}...".format(", ".join(nvme_disks))) | |
1195 | distribute_irqs(nvme_irqs, self.args.cpu_mask, | |
1196 | log_errors=(self.is_aws_i3_non_metal_instance or self.args.verbose)) | |
11fdf7f2 TL |
1197 | self.__tune_disks(nvme_disks) |
1198 | else: | |
9f95a23c | 1199 | perftune_print("No NVMe disks to tune") |
11fdf7f2 TL |
1200 | |
1201 | #### Protected methods ########################## | |
11fdf7f2 TL |
1202 | def _get_irqs(self): |
1203 | return itertools.chain.from_iterable(irqs for disks, irqs in self.__type2diskinfo.values()) | |
1204 | ||
1205 | #### Private methods ############################ | |
1206 | @property | |
1207 | def __io_schedulers(self): | |
1208 | """ | |
1209 | :return: An ordered list of IO schedulers that we want to configure. Schedulers are ordered by their priority | |
1210 | from the highest (left most) to the lowest. | |
1211 | """ | |
1212 | return ["none", "noop"] | |
1213 | ||
1214 | @property | |
1215 | def __nomerges(self): | |
1216 | return '2' | |
1217 | ||
20effc67 TL |
1218 | @property |
1219 | def __write_cache_config(self): | |
1220 | """ | |
1221 | :return: None - if write cache mode configuration is not requested or the corresponding write cache | |
1222 | configuration value string | |
1223 | """ | |
1224 | if self.args.set_write_back is None: | |
1225 | return None | |
1226 | ||
1227 | return "write back" if self.args.set_write_back else "write through" | |
1228 | ||
11fdf7f2 TL |
1229 | def __disks_info_by_type(self, disks_type): |
1230 | """ | |
1231 | Returns a tuple ( [<disks>], [<irqs>] ) for the given disks type. | |
1232 | IRQs numbers in the second list are promised to be unique. | |
1233 | """ | |
1234 | return self.__type2diskinfo[DiskPerfTuner.SupportedDiskTypes(disks_type)] | |
1235 | ||
1236 | def __nvme_fast_path_irq_filter(self, irq): | |
1237 | """ | |
1238 | Return True for fast path NVMe IRQs. | |
1239 | For NVMe device only queues 1-<number of CPUs> are going to do fast path work. | |
1240 | ||
1241 | NVMe IRQs have the following name convention: | |
1242 | nvme<device index>q<queue index>, e.g. nvme0q7 | |
1243 | ||
1244 | :param irq: IRQ number | |
1245 | :return: True if this IRQ is an IRQ of a FP NVMe queue. | |
1246 | """ | |
1247 | nvme_irq_re = re.compile(r'(\s|^)nvme\d+q(\d+)(\s|$)') | |
1248 | ||
1249 | # There may be more than an single HW queue bound to the same IRQ. In this case queue names are going to be | |
1e59de90 | 1250 | # comma separated |
11fdf7f2 TL |
1251 | split_line = self.__irqs2procline[irq].split(",") |
1252 | ||
1253 | for line in split_line: | |
1254 | m = nvme_irq_re.search(line) | |
1255 | if m and 0 < int(m.group(2)) <= multiprocessing.cpu_count(): | |
1256 | return True | |
1257 | ||
1258 | return False | |
1259 | ||
1260 | def __group_disks_info_by_type(self): | |
1261 | """ | |
1262 | Return a map of tuples ( [<disks>], [<irqs>] ), where "disks" are all disks of the specific type | |
1263 | and "irqs" are the corresponding IRQs. | |
1264 | ||
1265 | It's promised that every element is "disks" and "irqs" is unique. | |
1266 | ||
1267 | The disk types are 'nvme' and 'non-nvme' | |
1268 | """ | |
1269 | disks_info_by_type = {} | |
1270 | nvme_disks = set() | |
1271 | nvme_irqs = set() | |
1272 | non_nvme_disks = set() | |
1273 | non_nvme_irqs = set() | |
1274 | nvme_disk_name_pattern = re.compile('^nvme') | |
1275 | ||
1276 | for disk, irqs in self.__disk2irqs.items(): | |
1277 | if nvme_disk_name_pattern.search(disk): | |
1278 | nvme_disks.add(disk) | |
1279 | for irq in irqs: | |
1280 | nvme_irqs.add(irq) | |
1281 | else: | |
1282 | non_nvme_disks.add(disk) | |
1283 | for irq in irqs: | |
1284 | non_nvme_irqs.add(irq) | |
1285 | ||
1286 | if not (nvme_disks or non_nvme_disks): | |
1287 | raise Exception("'disks' tuning was requested but no disks were found") | |
1288 | ||
1289 | nvme_irqs = list(nvme_irqs) | |
1290 | ||
1291 | # There is a known issue with Xen hypervisor that exposes itself on AWS i3 instances where nvme module | |
1292 | # over-allocates HW queues and uses only queues 1,2,3,..., <up to number of CPUs> for data transfer. | |
1293 | # On these instances we will distribute only these queues. | |
9f95a23c TL |
1294 | |
1295 | if self.is_aws_i3_non_metal_instance: | |
1296 | nvme_irqs = list(filter(self.__nvme_fast_path_irq_filter, nvme_irqs)) | |
11fdf7f2 TL |
1297 | |
1298 | # Sort IRQs for easier verification | |
1299 | nvme_irqs.sort(key=lambda irq_num_str: int(irq_num_str)) | |
1300 | ||
1301 | disks_info_by_type[DiskPerfTuner.SupportedDiskTypes.nvme] = (list(nvme_disks), nvme_irqs) | |
1302 | disks_info_by_type[DiskPerfTuner.SupportedDiskTypes.non_nvme] = ( list(non_nvme_disks), list(non_nvme_irqs) ) | |
1303 | ||
1304 | return disks_info_by_type | |
1305 | ||
1306 | def __learn_directories(self): | |
1307 | return { directory : self.__learn_directory(directory) for directory in self.args.dirs } | |
1308 | ||
1309 | def __learn_directory(self, directory, recur=False): | |
1310 | """ | |
1311 | Returns a list of disks the given directory is mounted on (there will be more than one if | |
1312 | the mount point is on the RAID volume) | |
1313 | """ | |
1314 | if not os.path.exists(directory): | |
1315 | if not recur: | |
9f95a23c | 1316 | perftune_print("{} doesn't exist - skipping".format(directory)) |
11fdf7f2 TL |
1317 | |
1318 | return [] | |
1319 | ||
1320 | try: | |
9f95a23c | 1321 | udev_obj = pyudev.Devices.from_device_number(self.__pyudev_ctx, 'block', os.stat(directory).st_dev) |
11fdf7f2 TL |
1322 | return self.__get_phys_devices(udev_obj) |
1323 | except: | |
1324 | # handle cases like ecryptfs where the directory is mounted to another directory and not to some block device | |
9f95a23c | 1325 | filesystem = run_read_only_command(['df', '-P', directory]).splitlines()[-1].split()[0].strip() |
11fdf7f2 TL |
1326 | if not re.search(r'^/dev/', filesystem): |
1327 | devs = self.__learn_directory(filesystem, True) | |
1328 | else: | |
1329 | raise Exception("Logic error: failed to create a udev device while 'df -P' {} returns a {}".format(directory, filesystem)) | |
1330 | ||
1331 | # log error only for the original directory | |
1332 | if not recur and not devs: | |
9f95a23c | 1333 | perftune_print("Can't get a block device for {} - skipping".format(directory)) |
11fdf7f2 TL |
1334 | |
1335 | return devs | |
1336 | ||
1337 | def __get_phys_devices(self, udev_obj): | |
1338 | # if device is a virtual device - the underlying physical devices are going to be its slaves | |
1339 | if re.search(r'virtual', udev_obj.sys_path): | |
20effc67 TL |
1340 | slaves = os.listdir(os.path.join(udev_obj.sys_path, 'slaves')) |
1341 | # If the device is virtual but doesn't have slaves (e.g. as nvm-subsystem virtual devices) handle it | |
1342 | # as a regular device. | |
1343 | if slaves: | |
1344 | return list(itertools.chain.from_iterable([ self.__get_phys_devices(pyudev.Devices.from_device_file(self.__pyudev_ctx, "/dev/{}".format(slave))) for slave in slaves ])) | |
1345 | ||
1346 | # device node is something like /dev/sda1 - we need only the part without /dev/ | |
1347 | return [ re.match(r'/dev/(\S+\d*)', udev_obj.device_node).group(1) ] | |
11fdf7f2 TL |
1348 | |
1349 | def __learn_irqs(self): | |
1350 | disk2irqs = {} | |
1351 | ||
1352 | for devices in list(self.__dir2disks.values()) + [ self.args.devs ]: | |
1353 | for device in devices: | |
1354 | # There could be that some of the given directories are on the same disk. | |
1355 | # There is no need to rediscover IRQs of the disk we've already handled. | |
1356 | if device in disk2irqs.keys(): | |
1357 | continue | |
1358 | ||
9f95a23c | 1359 | udev_obj = pyudev.Devices.from_device_file(self.__pyudev_ctx, "/dev/{}".format(device)) |
11fdf7f2 | 1360 | dev_sys_path = udev_obj.sys_path |
20effc67 TL |
1361 | |
1362 | # If the device is a virtual NVMe device it's sys file name goes as follows: | |
1363 | # /sys/devices/virtual/nvme-subsystem/nvme-subsys0/nvme0n1 | |
1364 | # | |
1365 | # and then there is this symlink: | |
1366 | # /sys/devices/virtual/nvme-subsystem/nvme-subsys0/nvme0n1/device/nvme0 -> ../../../pci0000:85/0000:85:01.0/0000:87:00.0/nvme/nvme0 | |
1367 | # | |
1368 | # So, the "main device" is a "nvme\d+" prefix of the actual device name. | |
1369 | if re.search(r'virtual', udev_obj.sys_path): | |
1370 | m = re.match(r'(nvme\d+)\S*', device) | |
1371 | if m: | |
1372 | dev_sys_path = "{}/device/{}".format(udev_obj.sys_path, m.group(1)) | |
1373 | ||
1374 | split_sys_path = list(pathlib.PurePath(pathlib.Path(dev_sys_path).resolve()).parts) | |
11fdf7f2 TL |
1375 | |
1376 | # first part is always /sys/devices/pciXXX ... | |
1377 | controller_path_parts = split_sys_path[0:4] | |
1378 | ||
1379 | # ...then there is a chain of one or more "domain:bus:device.function" followed by the storage device enumeration crap | |
1380 | # e.g. /sys/devices/pci0000:00/0000:00:1f.2/ata2/host1/target1:0:0/1:0:0:0/block/sda/sda3 or | |
1381 | # /sys/devices/pci0000:00/0000:00:02.0/0000:02:00.0/host6/target6:2:0/6:2:0:0/block/sda/sda1 | |
1382 | # We want only the path till the last BDF including - it contains the IRQs information. | |
1383 | ||
1384 | patt = re.compile("^[0-9ABCDEFabcdef]{4}\:[0-9ABCDEFabcdef]{2}\:[0-9ABCDEFabcdef]{2}\.[0-9ABCDEFabcdef]$") | |
1385 | for split_sys_path_branch in split_sys_path[4:]: | |
1386 | if patt.search(split_sys_path_branch): | |
1387 | controller_path_parts.append(split_sys_path_branch) | |
1388 | else: | |
1389 | break | |
1390 | ||
1391 | controler_path_str = functools.reduce(lambda x, y : os.path.join(x, y), controller_path_parts) | |
1392 | disk2irqs[device] = learn_all_irqs_one(controler_path_str, self.__irqs2procline, 'blkif') | |
1393 | ||
1394 | return disk2irqs | |
1395 | ||
1396 | def __get_feature_file(self, dev_node, path_creator): | |
1397 | """ | |
1398 | Find the closest ancestor with the given feature and return its ('feature file', 'device node') tuple. | |
1399 | ||
1400 | If there isn't such an ancestor - return (None, None) tuple. | |
1401 | ||
1402 | :param dev_node Device node file name, e.g. /dev/sda1 | |
1403 | :param path_creator A functor that creates a feature file name given a device system file name | |
1404 | """ | |
20effc67 TL |
1405 | # Sanity check |
1406 | if dev_node is None or path_creator is None: | |
1407 | return None, None | |
1408 | ||
9f95a23c | 1409 | udev = pyudev.Devices.from_device_file(pyudev.Context(), dev_node) |
11fdf7f2 TL |
1410 | feature_file = path_creator(udev.sys_path) |
1411 | ||
1412 | if os.path.exists(feature_file): | |
1413 | return feature_file, dev_node | |
1414 | elif udev.parent is not None: | |
1415 | return self.__get_feature_file(udev.parent.device_node, path_creator) | |
1416 | else: | |
1417 | return None, None | |
1418 | ||
1419 | def __tune_one_feature(self, dev_node, path_creator, value, tuned_devs_set): | |
1420 | """ | |
1421 | Find the closest ancestor that has the given feature, configure it and | |
1422 | return True. | |
1423 | ||
1424 | If there isn't such ancestor - return False. | |
1425 | ||
1426 | :param dev_node Device node file name, e.g. /dev/sda1 | |
1427 | :param path_creator A functor that creates a feature file name given a device system file name | |
1428 | """ | |
1429 | feature_file, feature_node = self.__get_feature_file(dev_node, path_creator) | |
1430 | ||
1431 | if feature_file is None: | |
1432 | return False | |
1433 | ||
1434 | if feature_node not in tuned_devs_set: | |
1435 | fwriteln_and_log(feature_file, value) | |
1436 | tuned_devs_set.add(feature_node) | |
1437 | ||
1438 | return True | |
1439 | ||
1440 | def __tune_io_scheduler(self, dev_node, io_scheduler): | |
1441 | return self.__tune_one_feature(dev_node, lambda p : os.path.join(p, 'queue', 'scheduler'), io_scheduler, self.__io_scheduler_tuned_devs) | |
1442 | ||
1443 | def __tune_nomerges(self, dev_node): | |
1444 | return self.__tune_one_feature(dev_node, lambda p : os.path.join(p, 'queue', 'nomerges'), self.__nomerges, self.__nomerges_tuned_devs) | |
1445 | ||
20effc67 TL |
1446 | # If write cache configuration is not requested - return True immediately |
1447 | def __tune_write_back_cache(self, dev_node): | |
1448 | if self.__write_cache_config is None: | |
1449 | return True | |
1450 | ||
1451 | return self.__tune_one_feature(dev_node, lambda p : os.path.join(p, 'queue', 'write_cache'), self.__write_cache_config, self.__write_back_cache_tuned_devs) | |
1452 | ||
11fdf7f2 TL |
1453 | def __get_io_scheduler(self, dev_node): |
1454 | """ | |
1455 | Return a supported scheduler that is also present in the required schedulers list (__io_schedulers). | |
1456 | ||
1457 | If there isn't such a supported scheduler - return None. | |
1458 | """ | |
1459 | feature_file, feature_node = self.__get_feature_file(dev_node, lambda p : os.path.join(p, 'queue', 'scheduler')) | |
1460 | ||
1461 | lines = readlines(feature_file) | |
1462 | if not lines: | |
1463 | return None | |
1464 | ||
1465 | # Supported schedulers appear in the config file as a single line as follows: | |
1466 | # | |
1467 | # sched1 [sched2] sched3 | |
1468 | # | |
1469 | # ...with one or more schedulers where currently selected scheduler is the one in brackets. | |
1470 | # | |
1471 | # Return the scheduler with the highest priority among those that are supported for the current device. | |
20effc67 | 1472 | supported_schedulers = frozenset([scheduler.lstrip("[").rstrip("]").rstrip("\n") for scheduler in lines[0].split(" ")]) |
11fdf7f2 TL |
1473 | return next((scheduler for scheduler in self.__io_schedulers if scheduler in supported_schedulers), None) |
1474 | ||
1475 | def __tune_disk(self, device): | |
1476 | dev_node = "/dev/{}".format(device) | |
1477 | io_scheduler = self.__get_io_scheduler(dev_node) | |
1478 | ||
1479 | if not io_scheduler: | |
9f95a23c | 1480 | perftune_print("Not setting I/O Scheduler for {} - required schedulers ({}) are not supported".format(device, list(self.__io_schedulers))) |
11fdf7f2 | 1481 | elif not self.__tune_io_scheduler(dev_node, io_scheduler): |
9f95a23c | 1482 | perftune_print("Not setting I/O Scheduler for {} - feature not present".format(device)) |
11fdf7f2 TL |
1483 | |
1484 | if not self.__tune_nomerges(dev_node): | |
9f95a23c | 1485 | perftune_print("Not setting 'nomerges' for {} - feature not present".format(device)) |
11fdf7f2 | 1486 | |
20effc67 TL |
1487 | if not self.__tune_write_back_cache(dev_node): |
1488 | perftune_print("Not setting 'write_cache' for {} - feature not present".format(device)) | |
1489 | ||
11fdf7f2 TL |
1490 | def __tune_disks(self, disks): |
1491 | for disk in disks: | |
1492 | self.__tune_disk(disk) | |
1493 | ||
1494 | ################################################################################ | |
1495 | class TuneModes(enum.Enum): | |
1496 | disks = 0 | |
1497 | net = 1 | |
9f95a23c | 1498 | system = 2 |
11fdf7f2 TL |
1499 | |
1500 | @staticmethod | |
1501 | def names(): | |
1502 | return list(TuneModes.__members__.keys()) | |
1503 | ||
1504 | argp = argparse.ArgumentParser(description = 'Configure various system parameters in order to improve the seastar application performance.', formatter_class=argparse.RawDescriptionHelpFormatter, | |
1505 | epilog= | |
1506 | ''' | |
1507 | This script will: | |
1508 | ||
1509 | - Ban relevant IRQs from being moved by irqbalance. | |
1510 | - Configure various system parameters in /proc/sys. | |
1e59de90 TL |
1511 | - Distribute the IRQs (using SMP affinity configuration) among CPUs according to the configuration mode (see below) |
1512 | or an 'irq_cpu_mask' value. | |
11fdf7f2 TL |
1513 | |
1514 | As a result some of the CPUs may be destined to only handle the IRQs and taken out of the CPU set | |
1515 | that should be used to run the seastar application ("compute CPU set"). | |
1516 | ||
1517 | Modes description: | |
1518 | ||
1519 | sq - set all IRQs of a given NIC to CPU0 and configure RPS | |
1520 | to spreads NAPIs' handling between other CPUs. | |
1521 | ||
1522 | sq_split - divide all IRQs of a given NIC between CPU0 and its HT siblings and configure RPS | |
1523 | to spreads NAPIs' handling between other CPUs. | |
1524 | ||
1525 | mq - distribute NIC's IRQs among all CPUs instead of binding | |
1526 | them all to CPU0. In this mode RPS is always enabled to | |
1527 | spreads NAPIs' handling between all CPUs. | |
1528 | ||
1529 | If there isn't any mode given script will use a default mode: | |
1e59de90 TL |
1530 | - If number of CPU cores is greater than 16, allocate a single IRQ CPU core for each 16 CPU cores in 'cpu_mask'. |
1531 | IRQ cores are going to be allocated evenly on available NUMA nodes according to 'cpu_mask' value. | |
1532 | - If number of physical CPU cores per Rx HW queue is greater than 4 and less than 16 - use the 'sq-split' mode. | |
1533 | - Otherwise, if number of hyper-threads per Rx HW queue is greater than 4 - use the 'sq' mode. | |
11fdf7f2 TL |
1534 | - Otherwise use the 'mq' mode. |
1535 | ||
1536 | Default values: | |
1537 | ||
1538 | --nic NIC - default: eth0 | |
1539 | --cpu-mask MASK - default: all available cores mask | |
9f95a23c | 1540 | --tune-clock - default: false |
11fdf7f2 | 1541 | ''') |
1e59de90 | 1542 | argp.add_argument('--mode', choices=PerfTunerBase.SupportedModes.names(), help='configuration mode (deprecated, use --irq-cpu-mask instead)') |
20effc67 | 1543 | argp.add_argument('--nic', action='append', help='network interface name(s), by default uses \'eth0\' (may appear more than once)', dest='nics', default=[]) |
9f95a23c | 1544 | argp.add_argument('--tune-clock', action='store_true', help='Force tuning of the system clocksource') |
11fdf7f2 | 1545 | argp.add_argument('--get-cpu-mask', action='store_true', help="print the CPU mask to be used for compute") |
9f95a23c | 1546 | argp.add_argument('--get-cpu-mask-quiet', action='store_true', help="print the CPU mask to be used for compute, print the zero CPU set if that's what it turns out to be") |
1e59de90 | 1547 | argp.add_argument('--get-irq-cpu-mask', action='store_true', help="print the CPU mask to be used for IRQs binding") |
9f95a23c | 1548 | argp.add_argument('--verbose', action='store_true', help="be more verbose about operations and their result") |
11fdf7f2 TL |
1549 | argp.add_argument('--tune', choices=TuneModes.names(), help="components to configure (may be given more than once)", action='append', default=[]) |
1550 | argp.add_argument('--cpu-mask', help="mask of cores to use, by default use all available cores", metavar='MASK') | |
9f95a23c | 1551 | argp.add_argument('--irq-cpu-mask', help="mask of cores to use for IRQs binding", metavar='MASK') |
11fdf7f2 TL |
1552 | argp.add_argument('--dir', help="directory to optimize (may appear more than once)", action='append', dest='dirs', default=[]) |
1553 | argp.add_argument('--dev', help="device to optimize (may appear more than once), e.g. sda1", action='append', dest='devs', default=[]) | |
1554 | argp.add_argument('--options-file', help="configuration YAML file") | |
1555 | argp.add_argument('--dump-options-file', action='store_true', help="Print the configuration YAML file containing the current configuration") | |
9f95a23c | 1556 | argp.add_argument('--dry-run', action='store_true', help="Don't take any action, just recommend what to do.") |
20effc67 | 1557 | argp.add_argument('--write-back-cache', help="Enable/Disable \'write back\' write cache mode.", dest="set_write_back") |
1e59de90 TL |
1558 | argp.add_argument('--arfs', help="Enable/Disable aRFS", dest="enable_arfs") |
1559 | argp.add_argument('--num-rx-queues', help="Set a given number of Rx queues", type=int) | |
1560 | argp.add_argument('--irq-core-auto-detection-ratio', help="Use a given ratio for IRQ mask auto-detection. For " | |
1561 | "instance, if 8 is given and auto-detection is requested, a " | |
1562 | "single IRQ CPU core is going to be allocated for every 8 " | |
1563 | "CPU cores out of available according to a 'cpu_mask' value." | |
1564 | "Default is 16", | |
1565 | type=int, default=16, dest='cores_per_irq_core') | |
9f95a23c TL |
1566 | |
1567 | def parse_cpu_mask_from_yaml(y, field_name, fname): | |
1568 | hex_32bit_pattern='0x[0-9a-fA-F]{1,8}' | |
1569 | mask_pattern = re.compile('^{}((,({})?)*,{})*$'.format(hex_32bit_pattern, hex_32bit_pattern, hex_32bit_pattern)) | |
1570 | ||
1571 | if mask_pattern.match(str(y[field_name])): | |
1572 | return y[field_name] | |
1573 | else: | |
1574 | raise Exception("Bad '{}' value in {}: {}".format(field_name, fname, str(y[field_name]))) | |
11fdf7f2 | 1575 | |
20effc67 TL |
1576 | def extend_and_unique(orig_list, iterable): |
1577 | """ | |
1578 | Extend items to a list, and make the list items unique | |
1579 | """ | |
1580 | assert(isinstance(orig_list, list)) | |
1581 | assert(isinstance(iterable, list)) | |
1582 | orig_list.extend(iterable) | |
1583 | return list(set(orig_list)) | |
1584 | ||
1e59de90 TL |
1585 | def parse_tri_state_arg(value, arg_name): |
1586 | try: | |
1587 | if value is not None: | |
1588 | return distutils.util.strtobool(value) | |
1589 | else: | |
1590 | return None | |
1591 | except: | |
1592 | sys.exit("Invalid {} value: should be boolean but given: {}".format(arg_name, value)) | |
1593 | ||
11fdf7f2 TL |
1594 | def parse_options_file(prog_args): |
1595 | if not prog_args.options_file: | |
1596 | return | |
1597 | ||
9f95a23c | 1598 | y = yaml.safe_load(open(prog_args.options_file)) |
11fdf7f2 TL |
1599 | if y is None: |
1600 | return | |
1601 | ||
1602 | if 'mode' in y and not prog_args.mode: | |
1603 | if not y['mode'] in PerfTunerBase.SupportedModes.names(): | |
1604 | raise Exception("Bad 'mode' value in {}: {}".format(prog_args.options_file, y['mode'])) | |
1605 | prog_args.mode = y['mode'] | |
1606 | ||
20effc67 TL |
1607 | if 'nic' in y: |
1608 | # Multiple nics was supported by commit a2fc9d72c31b97840bc75ae49dbd6f4b6d394e25 | |
1609 | # `nic' option dumped to config file will be list after this change, but the `nic' | |
1610 | # option in old config file is still string, which was generated before this change. | |
1611 | # So here convert the string option to list. | |
1612 | if not isinstance(y['nic'], list): | |
1613 | y['nic'] = [y['nic']] | |
1614 | prog_args.nics = extend_and_unique(prog_args.nics, y['nic']) | |
11fdf7f2 | 1615 | |
9f95a23c TL |
1616 | if 'tune_clock' in y and not prog_args.tune_clock: |
1617 | prog_args.tune_clock= y['tune_clock'] | |
1618 | ||
11fdf7f2 TL |
1619 | if 'tune' in y: |
1620 | if set(y['tune']) <= set(TuneModes.names()): | |
20effc67 | 1621 | prog_args.tune = extend_and_unique(prog_args.tune, y['tune']) |
11fdf7f2 TL |
1622 | else: |
1623 | raise Exception("Bad 'tune' value in {}: {}".format(prog_args.options_file, y['tune'])) | |
1624 | ||
1625 | if 'cpu_mask' in y and not prog_args.cpu_mask: | |
9f95a23c TL |
1626 | prog_args.cpu_mask = parse_cpu_mask_from_yaml(y, 'cpu_mask', prog_args.options_file) |
1627 | ||
1628 | if 'irq_cpu_mask' in y and not prog_args.irq_cpu_mask: | |
1629 | prog_args.irq_cpu_mask = parse_cpu_mask_from_yaml(y, 'irq_cpu_mask', prog_args.options_file) | |
11fdf7f2 TL |
1630 | |
1631 | if 'dir' in y: | |
20effc67 | 1632 | prog_args.dirs = extend_and_unique(prog_args.dirs, y['dir']) |
11fdf7f2 TL |
1633 | |
1634 | if 'dev' in y: | |
20effc67 TL |
1635 | prog_args.devs = extend_and_unique(prog_args.devs, y['dev']) |
1636 | ||
1637 | if 'write_back_cache' in y: | |
1638 | prog_args.set_write_back = distutils.util.strtobool("{}".format(y['write_back_cache'])) | |
11fdf7f2 | 1639 | |
1e59de90 TL |
1640 | if 'arfs' in y: |
1641 | prog_args.enable_arfs = distutils.util.strtobool("{}".format(y['arfs'])) | |
1642 | ||
1643 | if 'num_rx_queues' in y: | |
1644 | prog_args.num_rx_queues = int(y['num_rx_queues']) | |
1645 | ||
1646 | # prog_options['irq_core_auto_detection_ratio'] = prog_args.cores_per_irq_core | |
1647 | if 'irq_core_auto_detection_ratio' in y: | |
1648 | prog_args.cores_per_irq_core = int(y['irq_core_auto_detection_ratio']) | |
1649 | ||
11fdf7f2 TL |
1650 | def dump_config(prog_args): |
1651 | prog_options = {} | |
1652 | ||
1653 | if prog_args.mode: | |
1e59de90 TL |
1654 | assert prog_args.cpu_mask, "cpu_mask has to always be set. Something is terribly wrong (a bug in perftune.py?)" |
1655 | mode = PerfTunerBase.SupportedModes[prog_args.mode] | |
1656 | prog_options['irq_cpu_mask'] = PerfTunerBase.irqs_cpu_mask_for_mode(mode, prog_args.cpu_mask) | |
11fdf7f2 | 1657 | |
20effc67 | 1658 | if prog_args.nics: |
1e59de90 | 1659 | prog_options['nic'] = list(set(prog_args.nics)) |
11fdf7f2 | 1660 | |
9f95a23c TL |
1661 | if prog_args.tune_clock: |
1662 | prog_options['tune_clock'] = prog_args.tune_clock | |
1663 | ||
11fdf7f2 | 1664 | if prog_args.tune: |
1e59de90 | 1665 | prog_options['tune'] = list(set(prog_args.tune)) |
11fdf7f2 TL |
1666 | |
1667 | if prog_args.cpu_mask: | |
1668 | prog_options['cpu_mask'] = prog_args.cpu_mask | |
1669 | ||
9f95a23c TL |
1670 | if prog_args.irq_cpu_mask: |
1671 | prog_options['irq_cpu_mask'] = prog_args.irq_cpu_mask | |
1672 | ||
11fdf7f2 | 1673 | if prog_args.dirs: |
1e59de90 | 1674 | prog_options['dir'] = list(set(prog_args.dirs)) |
11fdf7f2 TL |
1675 | |
1676 | if prog_args.devs: | |
1e59de90 | 1677 | prog_options['dev'] = list(set(prog_args.devs)) |
11fdf7f2 | 1678 | |
20effc67 TL |
1679 | if prog_args.set_write_back is not None: |
1680 | prog_options['write_back_cache'] = prog_args.set_write_back | |
1681 | ||
1e59de90 TL |
1682 | if prog_args.enable_arfs is not None: |
1683 | prog_options['arfs'] = prog_args.enable_arfs | |
1684 | ||
1685 | if prog_args.num_rx_queues is not None: | |
1686 | prog_options['num_rx_queues'] = f"{prog_args.num_rx_queues}" | |
1687 | ||
1688 | prog_options['irq_core_auto_detection_ratio'] = prog_args.cores_per_irq_core | |
1689 | ||
9f95a23c | 1690 | perftune_print(yaml.dump(prog_options, default_flow_style=False)) |
11fdf7f2 TL |
1691 | ################################################################################ |
1692 | ||
1693 | args = argp.parse_args() | |
20effc67 TL |
1694 | |
1695 | # Sanity check | |
1e59de90 TL |
1696 | args.set_write_back = parse_tri_state_arg(args.set_write_back, "--write-back-cache/write_back_cache") |
1697 | args.enable_arfs = parse_tri_state_arg(args.enable_arfs, "--arfs/arfs") | |
20effc67 | 1698 | |
9f95a23c | 1699 | dry_run_mode = args.dry_run |
11fdf7f2 TL |
1700 | parse_options_file(args) |
1701 | ||
1702 | # if nothing needs to be configured - quit | |
9f95a23c | 1703 | if not args.tune: |
11fdf7f2 TL |
1704 | sys.exit("ERROR: At least one tune mode MUST be given.") |
1705 | ||
9f95a23c TL |
1706 | # The must be either 'mode' or an explicit 'irq_cpu_mask' given - not both |
1707 | if args.mode and args.irq_cpu_mask: | |
1708 | sys.exit("ERROR: Provide either tune mode or IRQs CPU mask - not both.") | |
1709 | ||
1e59de90 TL |
1710 | # Sanity check |
1711 | if args.cores_per_irq_core < PerfTunerBase.min_cores_per_irq_core(): | |
1712 | sys.exit(f"ERROR: irq_core_auto_detection_ratio value must be greater or equal than " | |
1713 | f"{PerfTunerBase.min_cores_per_irq_core()}") | |
1714 | ||
11fdf7f2 | 1715 | # set default values ##################### |
20effc67 TL |
1716 | if not args.nics: |
1717 | args.nics = ['eth0'] | |
11fdf7f2 TL |
1718 | |
1719 | if not args.cpu_mask: | |
1720 | args.cpu_mask = run_hwloc_calc(['all']) | |
1721 | ########################################## | |
1722 | ||
9f95a23c TL |
1723 | # Sanity: irq_cpu_mask should be a subset of cpu_mask |
1724 | if args.irq_cpu_mask and run_hwloc_calc([args.cpu_mask]) != run_hwloc_calc([args.cpu_mask, args.irq_cpu_mask]): | |
1725 | sys.exit("ERROR: IRQ CPU mask({}) must be a subset of CPU mask({})".format(args.irq_cpu_mask, args.cpu_mask)) | |
1726 | ||
11fdf7f2 TL |
1727 | if args.dump_options_file: |
1728 | dump_config(args) | |
1729 | sys.exit(0) | |
1730 | ||
1731 | try: | |
1732 | tuners = [] | |
1733 | ||
1734 | if TuneModes.disks.name in args.tune: | |
1735 | tuners.append(DiskPerfTuner(args)) | |
1736 | ||
1737 | if TuneModes.net.name in args.tune: | |
1738 | tuners.append(NetPerfTuner(args)) | |
1739 | ||
9f95a23c TL |
1740 | if TuneModes.system.name in args.tune: |
1741 | tuners.append(SystemPerfTuner(args)) | |
1742 | ||
9f95a23c | 1743 | if args.get_cpu_mask or args.get_cpu_mask_quiet: |
11fdf7f2 | 1744 | # Print the compute mask from the first tuner - it's going to be the same in all of them |
9f95a23c | 1745 | perftune_print(tuners[0].compute_cpu_mask) |
1e59de90 TL |
1746 | elif args.get_irq_cpu_mask: |
1747 | perftune_print(tuners[0].irqs_cpu_mask) | |
11fdf7f2 TL |
1748 | else: |
1749 | # Tune the system | |
1750 | restart_irqbalance(itertools.chain.from_iterable([ tuner.irqs for tuner in tuners ])) | |
1751 | ||
1752 | for tuner in tuners: | |
1753 | tuner.tune() | |
9f95a23c TL |
1754 | except PerfTunerBase.CPUMaskIsZeroException as e: |
1755 | # Print a zero CPU set if --get-cpu-mask-quiet was requested. | |
1756 | if args.get_cpu_mask_quiet: | |
1757 | perftune_print("0x0") | |
1758 | else: | |
1759 | sys.exit("ERROR: {}. Your system can't be tuned until the issue is fixed.".format(e)) | |
11fdf7f2 TL |
1760 | except Exception as e: |
1761 | sys.exit("ERROR: {}. Your system can't be tuned until the issue is fixed.".format(e)) | |
1762 |