]> git.proxmox.com Git - mirror_ovs.git/blame - lib/dpdk.c
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
[mirror_ovs.git] / lib / dpdk.c
CommitLineData
01961bbd 1/*
5575908b 2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
01961bbd
DDP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpdk.h"
19
736ca516 20#include <stdio.h>
01961bbd
DDP
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <getopt.h>
24
d7e2509e 25#include <rte_errno.h>
736ca516 26#include <rte_log.h>
01961bbd 27#include <rte_memzone.h>
40c23a57 28#include <rte_version.h>
a0cbc627
CL
29#ifdef DPDK_PDUMP
30#include <rte_mempool.h>
31#include <rte_pdump.h>
32#endif
01961bbd
DDP
33
34#include "dirs.h"
a0cbc627 35#include "fatal-signal.h"
01961bbd 36#include "netdev-dpdk.h"
b6cabb8f 37#include "netdev-offload-provider.h"
01961bbd
DDP
38#include "openvswitch/dynamic-string.h"
39#include "openvswitch/vlog.h"
7189d54c 40#include "ovs-numa.h"
01961bbd 41#include "smap.h"
68c00e3e 42#include "svec.h"
30e834dc 43#include "util.h"
3e52fa56 44#include "vswitch-idl.h"
01961bbd
DDP
45
46VLOG_DEFINE_THIS_MODULE(dpdk);
47
736ca516
IM
48static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
49
01961bbd 50static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
a14d1cc8 51static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
30e834dc
LB
52static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY
53 * support. */
3e52fa56
AC
54static bool dpdk_initialized = false; /* Indicates successful initialization
55 * of DPDK. */
43307ad0 56static bool per_port_memory = false; /* Status of per port memory support */
01961bbd
DDP
57
58static int
6c4f08e2 59process_vhost_flags(char *flag, const char *default_val, int size,
01961bbd
DDP
60 const struct smap *ovs_other_config,
61 char **new_val)
62{
63 const char *val;
64 int changed = 0;
65
66 val = smap_get(ovs_other_config, flag);
67
68 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
69 * default value.
70 */
71 if (val && (strlen(val) <= size)) {
72 changed = 1;
73 *new_val = xstrdup(val);
74 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
75 } else {
76 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
6c4f08e2 77 *new_val = xstrdup(default_val);
01961bbd
DDP
78 }
79
80 return changed;
81}
82
01961bbd 83static bool
68c00e3e 84args_contains(const struct svec *args, const char *value)
01961bbd 85{
68c00e3e
IM
86 const char *arg;
87 size_t i;
88
89 /* We can't just use 'svec_contains' because args are not sorted. */
90 SVEC_FOR_EACH (i, arg, args) {
91 if (!strcmp(arg, value)) {
01961bbd 92 return true;
68c00e3e 93 }
01961bbd
DDP
94 }
95 return false;
96}
97
68c00e3e
IM
98static void
99construct_dpdk_options(const struct smap *ovs_other_config, struct svec *args)
01961bbd
DDP
100{
101 struct dpdk_options_map {
102 const char *ovs_configuration;
103 const char *dpdk_option;
104 bool default_enabled;
105 const char *default_value;
106 } opts[] = {
8411b6cc
IM
107 {"dpdk-lcore-mask", "-c", false, NULL},
108 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
109 {"dpdk-socket-limit", "--socket-limit", false, NULL},
01961bbd
DDP
110 };
111
68c00e3e 112 int i;
01961bbd
DDP
113
114 /*First, construct from the flat-options (non-mutex)*/
115 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
68c00e3e
IM
116 const char *value = smap_get(ovs_other_config,
117 opts[i].ovs_configuration);
118 if (!value && opts[i].default_enabled) {
119 value = opts[i].default_value;
01961bbd
DDP
120 }
121
68c00e3e
IM
122 if (value) {
123 if (!args_contains(args, opts[i].dpdk_option)) {
124 svec_add(args, opts[i].dpdk_option);
125 svec_add(args, value);
01961bbd
DDP
126 } else {
127 VLOG_WARN("Ignoring database defined option '%s' due to "
68c00e3e 128 "dpdk-extra config", opts[i].dpdk_option);
01961bbd
DDP
129 }
130 }
131 }
01961bbd
DDP
132}
133
7189d54c
MR
134static char *
135construct_dpdk_socket_mem(void)
136{
7189d54c 137 const char *def_value = "1024";
9c68ca34
IM
138 int numa, numa_nodes = ovs_numa_get_n_numas();
139 struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
7189d54c
MR
140
141 if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
142 numa_nodes = 1;
143 }
7189d54c 144
9c68ca34 145 ds_put_cstr(&dpdk_socket_mem, def_value);
7189d54c 146 for (numa = 1; numa < numa_nodes; ++numa) {
9c68ca34 147 ds_put_format(&dpdk_socket_mem, ",%s", def_value);
7189d54c
MR
148 }
149
9c68ca34 150 return ds_cstr(&dpdk_socket_mem);
7189d54c
MR
151}
152
01961bbd
DDP
153#define MAX_DPDK_EXCL_OPTS 10
154
68c00e3e 155static void
01961bbd 156construct_dpdk_mutex_options(const struct smap *ovs_other_config,
68c00e3e 157 struct svec *args)
01961bbd 158{
7189d54c 159 char *default_dpdk_socket_mem = construct_dpdk_socket_mem();
68c00e3e 160
01961bbd
DDP
161 struct dpdk_exclusive_options_map {
162 const char *category;
163 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
164 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
165 const char *default_value;
166 int default_option;
167 } excl_opts[] = {
168 {"memory type",
169 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
170 {"-m", "--socket-mem", NULL,},
7189d54c 171 default_dpdk_socket_mem, 1
01961bbd
DDP
172 },
173 };
174
68c00e3e 175 int i;
01961bbd
DDP
176 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
177 int found_opts = 0, scan, found_pos = -1;
178 const char *found_value;
179 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
180
181 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
182 && popt->ovs_dpdk_options[scan]; ++scan) {
68c00e3e
IM
183 const char *value = smap_get(ovs_other_config,
184 popt->ovs_dpdk_options[scan]);
185 if (value && strlen(value)) {
01961bbd
DDP
186 found_opts++;
187 found_pos = scan;
68c00e3e 188 found_value = value;
01961bbd
DDP
189 }
190 }
191
192 if (!found_opts) {
193 if (popt->default_option) {
194 found_pos = popt->default_option;
195 found_value = popt->default_value;
196 } else {
197 continue;
198 }
199 }
200
201 if (found_opts > 1) {
202 VLOG_ERR("Multiple defined options for %s. Please check your"
203 " database settings and reconfigure if necessary.",
204 popt->category);
205 }
206
68c00e3e
IM
207 if (!args_contains(args, popt->eal_dpdk_options[found_pos])) {
208 svec_add(args, popt->eal_dpdk_options[found_pos]);
209 svec_add(args, found_value);
01961bbd
DDP
210 } else {
211 VLOG_WARN("Ignoring database defined option '%s' due to "
68c00e3e 212 "dpdk-extra config", popt->eal_dpdk_options[found_pos]);
01961bbd
DDP
213 }
214 }
215
7189d54c 216 free(default_dpdk_socket_mem);
01961bbd
DDP
217}
218
68c00e3e
IM
219static void
220construct_dpdk_args(const struct smap *ovs_other_config, struct svec *args)
01961bbd 221{
68c00e3e 222 const char *extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
01961bbd 223
01961bbd 224 if (extra_configuration) {
68c00e3e 225 svec_parse_words(args, extra_configuration);
01961bbd
DDP
226 }
227
68c00e3e
IM
228 construct_dpdk_options(ovs_other_config, args);
229 construct_dpdk_mutex_options(ovs_other_config, args);
01961bbd
DDP
230}
231
736ca516
IM
232static ssize_t
233dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
234{
9fd38f68
IM
235 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600);
236 static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
736ca516
IM
237
238 switch (rte_log_cur_msg_loglevel()) {
239 case RTE_LOG_DEBUG:
d0d1a76e 240 VLOG_DBG_RL(&dbg_rl, "%.*s", (int) size, buf);
736ca516
IM
241 break;
242 case RTE_LOG_INFO:
243 case RTE_LOG_NOTICE:
d0d1a76e 244 VLOG_INFO_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
245 break;
246 case RTE_LOG_WARNING:
d0d1a76e 247 VLOG_WARN_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
248 break;
249 case RTE_LOG_ERR:
d0d1a76e 250 VLOG_ERR_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
251 break;
252 case RTE_LOG_CRIT:
253 case RTE_LOG_ALERT:
254 case RTE_LOG_EMERG:
d0d1a76e 255 VLOG_EMER("%.*s", (int) size, buf);
736ca516
IM
256 break;
257 default:
258 OVS_NOT_REACHED();
259 }
260
736ca516
IM
261 return size;
262}
263
264static cookie_io_functions_t dpdk_log_func = {
265 .write = dpdk_log_write,
266};
267
d7e2509e 268static bool
01961bbd
DDP
269dpdk_init__(const struct smap *ovs_other_config)
270{
68c00e3e
IM
271 char *sock_dir_subcomponent;
272 char **argv = NULL;
01961bbd 273 int result;
01961bbd
DDP
274 bool auto_determine = true;
275 int err = 0;
15d8655b 276 struct ovs_numa_dump *affinity = NULL;
68c00e3e 277 struct svec args = SVEC_EMPTY_INITIALIZER;
01961bbd 278
736ca516
IM
279 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
280 if (log_stream == NULL) {
281 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
282 } else {
283 setbuf(log_stream, NULL);
284 rte_openlog_stream(log_stream);
285 }
286
6c4f08e2 287 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
01961bbd
DDP
288 NAME_MAX, ovs_other_config,
289 &sock_dir_subcomponent)) {
290 struct stat s;
291 if (!strstr(sock_dir_subcomponent, "..")) {
292 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
293 sock_dir_subcomponent);
294
295 err = stat(vhost_sock_dir, &s);
296 if (err) {
297 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
298 vhost_sock_dir);
299 }
300 } else {
301 vhost_sock_dir = xstrdup(ovs_rundir());
302 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
303 "characters '..' - using %s instead.",
304 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
305 }
306 free(sock_dir_subcomponent);
307 } else {
308 vhost_sock_dir = sock_dir_subcomponent;
309 }
310
a14d1cc8
MK
311 vhost_iommu_enabled = smap_get_bool(ovs_other_config,
312 "vhost-iommu-support", false);
313 VLOG_INFO("IOMMU support for vhost-user-client %s.",
314 vhost_iommu_enabled ? "enabled" : "disabled");
315
30e834dc
LB
316 vhost_postcopy_enabled = smap_get_bool(ovs_other_config,
317 "vhost-postcopy-support", false);
318 if (vhost_postcopy_enabled && memory_locked()) {
319 VLOG_WARN("vhost-postcopy-support and mlockall are not compatible.");
320 vhost_postcopy_enabled = false;
321 }
322 VLOG_INFO("POSTCOPY support for vhost-user-client %s.",
323 vhost_postcopy_enabled ? "enabled" : "disabled");
324
43307ad0
IS
325 per_port_memory = smap_get_bool(ovs_other_config,
326 "per-port-memory", false);
327 VLOG_INFO("Per port memory for DPDK devices %s.",
328 per_port_memory ? "enabled" : "disabled");
329
68c00e3e
IM
330 svec_add(&args, ovs_get_program_name());
331 construct_dpdk_args(ovs_other_config, &args);
01961bbd 332
8411b6cc
IM
333 if (!args_contains(&args, "--legacy-mem")
334 && !args_contains(&args, "--socket-limit")) {
335 const char *arg;
336 size_t i;
337
338 SVEC_FOR_EACH (i, arg, &args) {
339 if (!strcmp(arg, "--socket-mem")) {
340 break;
341 }
342 }
343 if (i < args.n - 1) {
344 svec_add(&args, "--socket-limit");
345 svec_add(&args, args.names[i + 1]);
346 }
347 }
348
68c00e3e
IM
349 if (args_contains(&args, "-c") || args_contains(&args, "-l")) {
350 auto_determine = false;
01961bbd 351 }
01961bbd
DDP
352
353 /**
354 * NOTE: This is an unsophisticated mechanism for determining the DPDK
355 * lcore for the DPDK Master.
356 */
357 if (auto_determine) {
15d8655b 358 const struct ovs_numa_info_core *core;
68c00e3e
IM
359 int cpu = 0;
360
01961bbd 361 /* Get the main thread affinity */
15d8655b
IM
362 affinity = ovs_numa_thread_getaffinity_dump();
363 if (affinity) {
364 cpu = INT_MAX;
365 FOR_EACH_CORE_ON_DUMP (core, affinity) {
366 if (cpu > core->core_id) {
367 cpu = core->core_id;
01961bbd
DDP
368 }
369 }
370 } else {
01961bbd 371 /* User did not set dpdk-lcore-mask and unable to get current
68c00e3e 372 * thread affintity - default to core #0 */
15d8655b 373 VLOG_ERR("Thread getaffinity failed. Using core #0");
01961bbd 374 }
68c00e3e
IM
375 svec_add(&args, "-l");
376 svec_add_nocopy(&args, xasprintf("%d", cpu));
01961bbd
DDP
377 }
378
68c00e3e 379 svec_terminate(&args);
01961bbd
DDP
380
381 optind = 1;
382
383 if (VLOG_IS_INFO_ENABLED()) {
68c00e3e
IM
384 struct ds eal_args = DS_EMPTY_INITIALIZER;
385 char *joined_args = svec_join(&args, " ", ".");
386
387 ds_put_format(&eal_args, "EAL ARGS: %s", joined_args);
01961bbd
DDP
388 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
389 ds_destroy(&eal_args);
68c00e3e 390 free(joined_args);
01961bbd
DDP
391 }
392
68c00e3e
IM
393 /* Copy because 'rte_eal_init' will change the argv, i.e. it will remove
394 * some arguments from it. '+1' to copy the terminating NULL. */
395 argv = xmemdup(args.names, (args.n + 1) * sizeof args.names[0]);
fe11b9e0 396
01961bbd 397 /* Make sure things are initialized ... */
68c00e3e
IM
398 result = rte_eal_init(args.n, argv);
399
400 free(argv);
401 svec_destroy(&args);
01961bbd
DDP
402
403 /* Set the main thread affinity back to pre rte_eal_init() value */
15d8655b
IM
404 if (affinity) {
405 ovs_numa_thread_setaffinity_dump(affinity);
406 ovs_numa_dump_destroy(affinity);
01961bbd
DDP
407 }
408
d7e2509e
AC
409 if (result < 0) {
410 VLOG_EMER("Unable to initialize DPDK: %s", ovs_strerror(rte_errno));
411 return false;
412 }
413
450ff2bc
IM
414 if (VLOG_IS_DBG_ENABLED()) {
415 size_t size;
416 char *response = NULL;
417 FILE *stream = open_memstream(&response, &size);
418
419 if (stream) {
420 rte_memzone_dump(stream);
421 fclose(stream);
422 if (size) {
423 VLOG_DBG("rte_memzone_dump:\n%s", response);
424 }
425 free(response);
426 } else {
427 VLOG_DBG("Could not dump memzone. Unable to open memstream: %s.",
428 ovs_strerror(errno));
429 }
430 }
01961bbd
DDP
431
432 /* We are called from the main thread here */
433 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
434
435#ifdef DPDK_PDUMP
436 VLOG_INFO("DPDK pdump packet capture enabled");
4ae8c461
IM
437 VLOG_WARN("DPDK pdump support is deprecated and "
438 "will be removed in next OVS releases.");
01961bbd
DDP
439 err = rte_pdump_init(ovs_rundir());
440 if (err) {
441 VLOG_INFO("Error initialising DPDK pdump");
442 rte_pdump_uninit();
443 } else {
444 char *server_socket_path;
445
446 server_socket_path = xasprintf("%s/%s", ovs_rundir(),
447 "pdump_server_socket");
448 fatal_signal_add_file_to_unlink(server_socket_path);
449 free(server_socket_path);
450 }
451#endif
452
453 /* Finally, register the dpdk classes */
454 netdev_dpdk_register();
4f746d52 455 netdev_register_flow_api_provider(&netdev_offload_dpdk);
d7e2509e 456 return true;
01961bbd
DDP
457}
458
459void
460dpdk_init(const struct smap *ovs_other_config)
461{
ec2b0701
DDP
462 static bool enabled = false;
463
464 if (enabled || !ovs_other_config) {
465 return;
466 }
467
3e52fa56
AC
468 const char *dpdk_init_val = smap_get_def(ovs_other_config, "dpdk-init",
469 "false");
470
6455316b
IM
471 bool try_only = !strcasecmp(dpdk_init_val, "try");
472 if (!strcasecmp(dpdk_init_val, "true") || try_only) {
ec2b0701 473 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
01961bbd 474
ec2b0701 475 if (ovsthread_once_start(&once_enable)) {
40c23a57 476 VLOG_INFO("Using %s", rte_version());
ec2b0701 477 VLOG_INFO("DPDK Enabled - initializing...");
d7e2509e
AC
478 enabled = dpdk_init__(ovs_other_config);
479 if (enabled) {
480 VLOG_INFO("DPDK Enabled - initialized");
3e52fa56 481 } else if (!try_only) {
d7e2509e
AC
482 ovs_abort(rte_errno, "Cannot init EAL");
483 }
ec2b0701 484 ovsthread_once_done(&once_enable);
d7e2509e
AC
485 } else {
486 VLOG_ERR_ONCE("DPDK Initialization Failed.");
ec2b0701
DDP
487 }
488 } else {
5575908b 489 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
01961bbd 490 }
3e52fa56 491 dpdk_initialized = enabled;
01961bbd
DDP
492}
493
494const char *
495dpdk_get_vhost_sock_dir(void)
496{
497 return vhost_sock_dir;
498}
499
a14d1cc8
MK
500bool
501dpdk_vhost_iommu_enabled(void)
502{
503 return vhost_iommu_enabled;
504}
505
30e834dc
LB
506bool
507dpdk_vhost_postcopy_enabled(void)
508{
509 return vhost_postcopy_enabled;
510}
511
43307ad0
IS
512bool
513dpdk_per_port_memory(void)
514{
515 return per_port_memory;
516}
517
1276e3db
IM
518bool
519dpdk_available(void)
520{
521 return dpdk_initialized;
522}
523
01961bbd
DDP
524void
525dpdk_set_lcore_id(unsigned cpu)
526{
527 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
528 ovs_assert(cpu != NON_PMD_CORE_ID);
529 RTE_PER_LCORE(_lcore_id) = cpu;
530}
40c23a57
MC
531
532void
533print_dpdk_version(void)
534{
535 puts(rte_version());
536}
3e52fa56
AC
537
538void
539dpdk_status(const struct ovsrec_open_vswitch *cfg)
540{
541 if (cfg) {
542 ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized);
543 ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version());
544 }
545}