]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpdk.c
ipf: Avoid accessing to a freed rp.
[mirror_ovs.git] / lib / dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "dpdk.h"
19
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <getopt.h>
24
25 #include <rte_cpuflags.h>
26 #include <rte_errno.h>
27 #include <rte_log.h>
28 #include <rte_memzone.h>
29 #include <rte_version.h>
30
31 #include "dirs.h"
32 #include "fatal-signal.h"
33 #include "netdev-dpdk.h"
34 #include "netdev-offload-provider.h"
35 #include "openvswitch/dynamic-string.h"
36 #include "openvswitch/vlog.h"
37 #include "ovs-numa.h"
38 #include "smap.h"
39 #include "svec.h"
40 #include "unixctl.h"
41 #include "util.h"
42 #include "vswitch-idl.h"
43
44 VLOG_DEFINE_THIS_MODULE(dpdk);
45
46 static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
47
48 static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
49 static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
50 static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY
51 * support. */
52 static bool dpdk_initialized = false; /* Indicates successful initialization
53 * of DPDK. */
54 static bool per_port_memory = false; /* Status of per port memory support */
55
56 static int
57 process_vhost_flags(char *flag, const char *default_val, int size,
58 const struct smap *ovs_other_config,
59 char **new_val)
60 {
61 const char *val;
62 int changed = 0;
63
64 val = smap_get(ovs_other_config, flag);
65
66 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
67 * default value.
68 */
69 if (val && (strlen(val) <= size)) {
70 changed = 1;
71 *new_val = xstrdup(val);
72 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
73 } else {
74 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
75 *new_val = xstrdup(default_val);
76 }
77
78 return changed;
79 }
80
81 static bool
82 args_contains(const struct svec *args, const char *value)
83 {
84 const char *arg;
85 size_t i;
86
87 /* We can't just use 'svec_contains' because args are not sorted. */
88 SVEC_FOR_EACH (i, arg, args) {
89 if (!strcmp(arg, value)) {
90 return true;
91 }
92 }
93 return false;
94 }
95
96 static void
97 construct_dpdk_options(const struct smap *ovs_other_config, struct svec *args)
98 {
99 struct dpdk_options_map {
100 const char *ovs_configuration;
101 const char *dpdk_option;
102 bool default_enabled;
103 const char *default_value;
104 } opts[] = {
105 {"dpdk-lcore-mask", "-c", false, NULL},
106 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
107 {"dpdk-socket-limit", "--socket-limit", false, NULL},
108 };
109
110 int i;
111
112 /*First, construct from the flat-options (non-mutex)*/
113 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
114 const char *value = smap_get(ovs_other_config,
115 opts[i].ovs_configuration);
116 if (!value && opts[i].default_enabled) {
117 value = opts[i].default_value;
118 }
119
120 if (value) {
121 if (!args_contains(args, opts[i].dpdk_option)) {
122 svec_add(args, opts[i].dpdk_option);
123 svec_add(args, value);
124 } else {
125 VLOG_WARN("Ignoring database defined option '%s' due to "
126 "dpdk-extra config", opts[i].dpdk_option);
127 }
128 }
129 }
130 }
131
132 static char *
133 construct_dpdk_socket_mem(void)
134 {
135 const char *def_value = "1024";
136 int numa, numa_nodes = ovs_numa_get_n_numas();
137 struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
138
139 if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
140 numa_nodes = 1;
141 }
142
143 ds_put_cstr(&dpdk_socket_mem, def_value);
144 for (numa = 1; numa < numa_nodes; ++numa) {
145 ds_put_format(&dpdk_socket_mem, ",%s", def_value);
146 }
147
148 return ds_cstr(&dpdk_socket_mem);
149 }
150
151 #define MAX_DPDK_EXCL_OPTS 10
152
153 static void
154 construct_dpdk_mutex_options(const struct smap *ovs_other_config,
155 struct svec *args)
156 {
157 char *default_dpdk_socket_mem = construct_dpdk_socket_mem();
158
159 struct dpdk_exclusive_options_map {
160 const char *category;
161 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
162 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
163 const char *default_value;
164 int default_option;
165 } excl_opts[] = {
166 {"memory type",
167 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
168 {"-m", "--socket-mem", NULL,},
169 default_dpdk_socket_mem, 1
170 },
171 };
172
173 int i;
174 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
175 int found_opts = 0, scan, found_pos = -1;
176 const char *found_value;
177 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
178
179 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
180 && popt->ovs_dpdk_options[scan]; ++scan) {
181 const char *value = smap_get(ovs_other_config,
182 popt->ovs_dpdk_options[scan]);
183 if (value && strlen(value)) {
184 found_opts++;
185 found_pos = scan;
186 found_value = value;
187 }
188 }
189
190 if (!found_opts) {
191 if (popt->default_option) {
192 found_pos = popt->default_option;
193 found_value = popt->default_value;
194 } else {
195 continue;
196 }
197 }
198
199 if (found_opts > 1) {
200 VLOG_ERR("Multiple defined options for %s. Please check your"
201 " database settings and reconfigure if necessary.",
202 popt->category);
203 }
204
205 if (!args_contains(args, popt->eal_dpdk_options[found_pos])) {
206 svec_add(args, popt->eal_dpdk_options[found_pos]);
207 svec_add(args, found_value);
208 } else {
209 VLOG_WARN("Ignoring database defined option '%s' due to "
210 "dpdk-extra config", popt->eal_dpdk_options[found_pos]);
211 }
212 }
213
214 free(default_dpdk_socket_mem);
215 }
216
217 static void
218 construct_dpdk_args(const struct smap *ovs_other_config, struct svec *args)
219 {
220 const char *extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
221
222 if (extra_configuration) {
223 svec_parse_words(args, extra_configuration);
224 }
225
226 construct_dpdk_options(ovs_other_config, args);
227 construct_dpdk_mutex_options(ovs_other_config, args);
228 }
229
230 static ssize_t
231 dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
232 {
233 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600);
234 static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
235
236 switch (rte_log_cur_msg_loglevel()) {
237 case RTE_LOG_DEBUG:
238 VLOG_DBG_RL(&dbg_rl, "%.*s", (int) size, buf);
239 break;
240 case RTE_LOG_INFO:
241 case RTE_LOG_NOTICE:
242 VLOG_INFO_RL(&rl, "%.*s", (int) size, buf);
243 break;
244 case RTE_LOG_WARNING:
245 VLOG_WARN_RL(&rl, "%.*s", (int) size, buf);
246 break;
247 case RTE_LOG_ERR:
248 VLOG_ERR_RL(&rl, "%.*s", (int) size, buf);
249 break;
250 case RTE_LOG_CRIT:
251 case RTE_LOG_ALERT:
252 case RTE_LOG_EMERG:
253 VLOG_EMER("%.*s", (int) size, buf);
254 break;
255 default:
256 OVS_NOT_REACHED();
257 }
258
259 return size;
260 }
261
262 static cookie_io_functions_t dpdk_log_func = {
263 .write = dpdk_log_write,
264 };
265
266 static void
267 dpdk_unixctl_mem_stream(struct unixctl_conn *conn, int argc OVS_UNUSED,
268 const char *argv[] OVS_UNUSED, void *aux)
269 {
270 void (*callback)(FILE *) = aux;
271 char *response = NULL;
272 FILE *stream;
273 size_t size;
274
275 stream = open_memstream(&response, &size);
276 if (!stream) {
277 response = xasprintf("Unable to open memstream: %s.",
278 ovs_strerror(errno));
279 unixctl_command_reply_error(conn, response);
280 goto out;
281 }
282
283 callback(stream);
284 fclose(stream);
285 unixctl_command_reply(conn, response);
286 out:
287 free(response);
288 }
289
290 static int
291 dpdk_parse_log_level(const char *s)
292 {
293 static const char * const levels[] = {
294 [RTE_LOG_EMERG] = "emergency",
295 [RTE_LOG_ALERT] = "alert",
296 [RTE_LOG_CRIT] = "critical",
297 [RTE_LOG_ERR] = "error",
298 [RTE_LOG_WARNING] = "warning",
299 [RTE_LOG_NOTICE] = "notice",
300 [RTE_LOG_INFO] = "info",
301 [RTE_LOG_DEBUG] = "debug",
302 };
303 int i;
304
305 for (i = 1; i < ARRAY_SIZE(levels); ++i) {
306 if (!strcmp(s, levels[i])) {
307 return i;
308 }
309 }
310 return -1;
311 }
312
313 static void
314 dpdk_unixctl_log_set(struct unixctl_conn *conn, int argc, const char *argv[],
315 void *aux OVS_UNUSED)
316 {
317 int i;
318
319 /* With no argument, set all components level to 'debug'. */
320 if (argc == 1) {
321 rte_log_set_level_pattern("*", RTE_LOG_DEBUG);
322 }
323 for (i = 1; i < argc; i++) {
324 char *err_msg = NULL;
325 char *level_string;
326 char *pattern;
327 char *s;
328 int level;
329
330 s = xstrdup(argv[i]);
331 level_string = strchr(s, ':');
332 if (level_string == NULL) {
333 pattern = "*";
334 level_string = s;
335 } else {
336 pattern = s;
337 level_string[0] = '\0';
338 level_string++;
339 }
340
341 level = dpdk_parse_log_level(level_string);
342 if (level == -1) {
343 err_msg = xasprintf("invalid log level: '%s'", level_string);
344 } else if (rte_log_set_level_pattern(pattern, level) < 0) {
345 err_msg = xasprintf("cannot set log level for '%s'", argv[i]);
346 }
347
348 if (err_msg) {
349 unixctl_command_reply_error(conn, err_msg);
350 free(err_msg);
351 free(s);
352 return;
353 }
354 free(s);
355 }
356 unixctl_command_reply(conn, NULL);
357 }
358
359 static bool
360 dpdk_init__(const struct smap *ovs_other_config)
361 {
362 char *sock_dir_subcomponent;
363 char **argv = NULL;
364 int result;
365 bool auto_determine = true;
366 int err = 0;
367 struct ovs_numa_dump *affinity = NULL;
368 struct svec args = SVEC_EMPTY_INITIALIZER;
369
370 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
371 if (log_stream == NULL) {
372 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
373 } else {
374 setbuf(log_stream, NULL);
375 rte_openlog_stream(log_stream);
376 }
377
378 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
379 NAME_MAX, ovs_other_config,
380 &sock_dir_subcomponent)) {
381 struct stat s;
382 if (!strstr(sock_dir_subcomponent, "..")) {
383 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
384 sock_dir_subcomponent);
385
386 err = stat(vhost_sock_dir, &s);
387 if (err) {
388 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
389 vhost_sock_dir);
390 }
391 } else {
392 vhost_sock_dir = xstrdup(ovs_rundir());
393 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
394 "characters '..' - using %s instead.",
395 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
396 }
397 free(sock_dir_subcomponent);
398 } else {
399 vhost_sock_dir = sock_dir_subcomponent;
400 }
401
402 vhost_iommu_enabled = smap_get_bool(ovs_other_config,
403 "vhost-iommu-support", false);
404 VLOG_INFO("IOMMU support for vhost-user-client %s.",
405 vhost_iommu_enabled ? "enabled" : "disabled");
406
407 vhost_postcopy_enabled = smap_get_bool(ovs_other_config,
408 "vhost-postcopy-support", false);
409 if (vhost_postcopy_enabled && memory_locked()) {
410 VLOG_WARN("vhost-postcopy-support and mlockall are not compatible.");
411 vhost_postcopy_enabled = false;
412 }
413 VLOG_INFO("POSTCOPY support for vhost-user-client %s.",
414 vhost_postcopy_enabled ? "enabled" : "disabled");
415
416 per_port_memory = smap_get_bool(ovs_other_config,
417 "per-port-memory", false);
418 VLOG_INFO("Per port memory for DPDK devices %s.",
419 per_port_memory ? "enabled" : "disabled");
420
421 svec_add(&args, ovs_get_program_name());
422 construct_dpdk_args(ovs_other_config, &args);
423
424 if (!args_contains(&args, "--legacy-mem")
425 && !args_contains(&args, "--socket-limit")) {
426 const char *arg;
427 size_t i;
428
429 SVEC_FOR_EACH (i, arg, &args) {
430 if (!strcmp(arg, "--socket-mem")) {
431 break;
432 }
433 }
434 if (i < args.n - 1) {
435 svec_add(&args, "--socket-limit");
436 svec_add(&args, args.names[i + 1]);
437 }
438 }
439
440 if (args_contains(&args, "-c") || args_contains(&args, "-l")) {
441 auto_determine = false;
442 }
443
444 /**
445 * NOTE: This is an unsophisticated mechanism for determining the DPDK
446 * main core.
447 */
448 if (auto_determine) {
449 const struct ovs_numa_info_core *core;
450 int cpu = 0;
451
452 /* Get the main thread affinity */
453 affinity = ovs_numa_thread_getaffinity_dump();
454 if (affinity) {
455 cpu = INT_MAX;
456 FOR_EACH_CORE_ON_DUMP (core, affinity) {
457 if (cpu > core->core_id) {
458 cpu = core->core_id;
459 }
460 }
461 } else {
462 /* User did not set dpdk-lcore-mask and unable to get current
463 * thread affintity - default to core #0 */
464 VLOG_ERR("Thread getaffinity failed. Using core #0");
465 }
466 svec_add(&args, "-l");
467 svec_add_nocopy(&args, xasprintf("%d", cpu));
468 }
469
470 svec_terminate(&args);
471
472 optind = 1;
473
474 if (VLOG_IS_INFO_ENABLED()) {
475 struct ds eal_args = DS_EMPTY_INITIALIZER;
476 char *joined_args = svec_join(&args, " ", ".");
477
478 ds_put_format(&eal_args, "EAL ARGS: %s", joined_args);
479 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
480 ds_destroy(&eal_args);
481 free(joined_args);
482 }
483
484 /* Copy because 'rte_eal_init' will change the argv, i.e. it will remove
485 * some arguments from it. '+1' to copy the terminating NULL. */
486 argv = xmemdup(args.names, (args.n + 1) * sizeof args.names[0]);
487
488 /* Make sure things are initialized ... */
489 result = rte_eal_init(args.n, argv);
490
491 free(argv);
492 svec_destroy(&args);
493
494 /* Set the main thread affinity back to pre rte_eal_init() value */
495 if (affinity) {
496 ovs_numa_thread_setaffinity_dump(affinity);
497 ovs_numa_dump_destroy(affinity);
498 }
499
500 if (result < 0) {
501 VLOG_EMER("Unable to initialize DPDK: %s", ovs_strerror(rte_errno));
502 return false;
503 }
504
505 if (VLOG_IS_DBG_ENABLED()) {
506 size_t size;
507 char *response = NULL;
508 FILE *stream = open_memstream(&response, &size);
509
510 if (stream) {
511 fprintf(stream, "rte_memzone_dump:\n");
512 rte_memzone_dump(stream);
513 fprintf(stream, "rte_log_dump:\n");
514 rte_log_dump(stream);
515 fclose(stream);
516 VLOG_DBG("%s", response);
517 free(response);
518 } else {
519 VLOG_DBG("Could not dump memzone and log levels. "
520 "Unable to open memstream: %s.", ovs_strerror(errno));
521 }
522 }
523
524 unixctl_command_register("dpdk/log-list", "", 0, 0,
525 dpdk_unixctl_mem_stream, rte_log_dump);
526 unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
527 INT_MAX, dpdk_unixctl_log_set, NULL);
528
529 /* We are called from the main thread here */
530 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
531
532 /* Finally, register the dpdk classes */
533 netdev_dpdk_register();
534 netdev_register_flow_api_provider(&netdev_offload_dpdk);
535 return true;
536 }
537
538 void
539 dpdk_init(const struct smap *ovs_other_config)
540 {
541 static bool enabled = false;
542
543 if (enabled || !ovs_other_config) {
544 return;
545 }
546
547 const char *dpdk_init_val = smap_get_def(ovs_other_config, "dpdk-init",
548 "false");
549
550 bool try_only = !strcasecmp(dpdk_init_val, "try");
551 if (!strcasecmp(dpdk_init_val, "true") || try_only) {
552 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
553
554 if (ovsthread_once_start(&once_enable)) {
555 VLOG_INFO("Using %s", rte_version());
556 VLOG_INFO("DPDK Enabled - initializing...");
557 enabled = dpdk_init__(ovs_other_config);
558 if (enabled) {
559 VLOG_INFO("DPDK Enabled - initialized");
560 } else if (!try_only) {
561 ovs_abort(rte_errno, "Cannot init EAL");
562 }
563 ovsthread_once_done(&once_enable);
564 } else {
565 VLOG_ERR_ONCE("DPDK Initialization Failed.");
566 }
567 } else {
568 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
569 }
570 dpdk_initialized = enabled;
571 }
572
573 const char *
574 dpdk_get_vhost_sock_dir(void)
575 {
576 return vhost_sock_dir;
577 }
578
579 bool
580 dpdk_vhost_iommu_enabled(void)
581 {
582 return vhost_iommu_enabled;
583 }
584
585 bool
586 dpdk_vhost_postcopy_enabled(void)
587 {
588 return vhost_postcopy_enabled;
589 }
590
591 bool
592 dpdk_per_port_memory(void)
593 {
594 return per_port_memory;
595 }
596
597 bool
598 dpdk_available(void)
599 {
600 return dpdk_initialized;
601 }
602
603 void
604 dpdk_set_lcore_id(unsigned cpu)
605 {
606 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
607 ovs_assert(cpu != NON_PMD_CORE_ID);
608 RTE_PER_LCORE(_lcore_id) = cpu;
609 }
610
611 void
612 print_dpdk_version(void)
613 {
614 puts(rte_version());
615 }
616
617 #define CHECK_CPU_FEATURE(feature, name_str, RTE_CPUFLAG) \
618 do { \
619 if (strncmp(feature, name_str, strlen(name_str)) == 0) { \
620 int has_isa = rte_cpu_get_flag_enabled(RTE_CPUFLAG); \
621 VLOG_DBG("CPU flag %s, available %s\n", name_str, \
622 has_isa ? "yes" : "no"); \
623 return true; \
624 } \
625 } while (0)
626
627 bool
628 dpdk_get_cpu_has_isa(const char *arch, const char *feature)
629 {
630 /* Ensure Arch is x86_64. */
631 if (strncmp(arch, "x86_64", 6) != 0) {
632 return false;
633 }
634
635 #if __x86_64__
636 /* CPU flags only defined for the architecture that support it. */
637 CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F);
638 CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2);
639 #endif
640
641 VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n",
642 arch, feature);
643 return false;
644 }
645
646 void
647 dpdk_status(const struct ovsrec_open_vswitch *cfg)
648 {
649 if (cfg) {
650 ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized);
651 ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version());
652 }
653 }