]> git.proxmox.com Git - ovs.git/blame - lib/dpdk.c
tc: Support new terse dump kernel API
[ovs.git] / lib / dpdk.c
CommitLineData
01961bbd 1/*
5575908b 2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
01961bbd
DDP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpdk.h"
19
736ca516 20#include <stdio.h>
01961bbd
DDP
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <getopt.h>
24
d7e2509e 25#include <rte_errno.h>
736ca516 26#include <rte_log.h>
01961bbd 27#include <rte_memzone.h>
40c23a57 28#include <rte_version.h>
01961bbd
DDP
29
30#include "dirs.h"
a0cbc627 31#include "fatal-signal.h"
01961bbd 32#include "netdev-dpdk.h"
b6cabb8f 33#include "netdev-offload-provider.h"
01961bbd
DDP
34#include "openvswitch/dynamic-string.h"
35#include "openvswitch/vlog.h"
7189d54c 36#include "ovs-numa.h"
01961bbd 37#include "smap.h"
68c00e3e 38#include "svec.h"
30e834dc 39#include "util.h"
3e52fa56 40#include "vswitch-idl.h"
01961bbd
DDP
41
42VLOG_DEFINE_THIS_MODULE(dpdk);
43
736ca516
IM
44static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
45
01961bbd 46static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
a14d1cc8 47static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
30e834dc
LB
48static bool vhost_postcopy_enabled = false; /* Status of vHost POSTCOPY
49 * support. */
3e52fa56
AC
50static bool dpdk_initialized = false; /* Indicates successful initialization
51 * of DPDK. */
43307ad0 52static bool per_port_memory = false; /* Status of per port memory support */
01961bbd
DDP
53
54static int
6c4f08e2 55process_vhost_flags(char *flag, const char *default_val, int size,
01961bbd
DDP
56 const struct smap *ovs_other_config,
57 char **new_val)
58{
59 const char *val;
60 int changed = 0;
61
62 val = smap_get(ovs_other_config, flag);
63
64 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
65 * default value.
66 */
67 if (val && (strlen(val) <= size)) {
68 changed = 1;
69 *new_val = xstrdup(val);
70 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
71 } else {
72 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
6c4f08e2 73 *new_val = xstrdup(default_val);
01961bbd
DDP
74 }
75
76 return changed;
77}
78
01961bbd 79static bool
68c00e3e 80args_contains(const struct svec *args, const char *value)
01961bbd 81{
68c00e3e
IM
82 const char *arg;
83 size_t i;
84
85 /* We can't just use 'svec_contains' because args are not sorted. */
86 SVEC_FOR_EACH (i, arg, args) {
87 if (!strcmp(arg, value)) {
01961bbd 88 return true;
68c00e3e 89 }
01961bbd
DDP
90 }
91 return false;
92}
93
68c00e3e
IM
94static void
95construct_dpdk_options(const struct smap *ovs_other_config, struct svec *args)
01961bbd
DDP
96{
97 struct dpdk_options_map {
98 const char *ovs_configuration;
99 const char *dpdk_option;
100 bool default_enabled;
101 const char *default_value;
102 } opts[] = {
8411b6cc
IM
103 {"dpdk-lcore-mask", "-c", false, NULL},
104 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
105 {"dpdk-socket-limit", "--socket-limit", false, NULL},
01961bbd
DDP
106 };
107
68c00e3e 108 int i;
01961bbd
DDP
109
110 /*First, construct from the flat-options (non-mutex)*/
111 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
68c00e3e
IM
112 const char *value = smap_get(ovs_other_config,
113 opts[i].ovs_configuration);
114 if (!value && opts[i].default_enabled) {
115 value = opts[i].default_value;
01961bbd
DDP
116 }
117
68c00e3e
IM
118 if (value) {
119 if (!args_contains(args, opts[i].dpdk_option)) {
120 svec_add(args, opts[i].dpdk_option);
121 svec_add(args, value);
01961bbd
DDP
122 } else {
123 VLOG_WARN("Ignoring database defined option '%s' due to "
68c00e3e 124 "dpdk-extra config", opts[i].dpdk_option);
01961bbd
DDP
125 }
126 }
127 }
01961bbd
DDP
128}
129
7189d54c
MR
130static char *
131construct_dpdk_socket_mem(void)
132{
7189d54c 133 const char *def_value = "1024";
9c68ca34
IM
134 int numa, numa_nodes = ovs_numa_get_n_numas();
135 struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER;
7189d54c
MR
136
137 if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) {
138 numa_nodes = 1;
139 }
7189d54c 140
9c68ca34 141 ds_put_cstr(&dpdk_socket_mem, def_value);
7189d54c 142 for (numa = 1; numa < numa_nodes; ++numa) {
9c68ca34 143 ds_put_format(&dpdk_socket_mem, ",%s", def_value);
7189d54c
MR
144 }
145
9c68ca34 146 return ds_cstr(&dpdk_socket_mem);
7189d54c
MR
147}
148
01961bbd
DDP
149#define MAX_DPDK_EXCL_OPTS 10
150
68c00e3e 151static void
01961bbd 152construct_dpdk_mutex_options(const struct smap *ovs_other_config,
68c00e3e 153 struct svec *args)
01961bbd 154{
7189d54c 155 char *default_dpdk_socket_mem = construct_dpdk_socket_mem();
68c00e3e 156
01961bbd
DDP
157 struct dpdk_exclusive_options_map {
158 const char *category;
159 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
160 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
161 const char *default_value;
162 int default_option;
163 } excl_opts[] = {
164 {"memory type",
165 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
166 {"-m", "--socket-mem", NULL,},
7189d54c 167 default_dpdk_socket_mem, 1
01961bbd
DDP
168 },
169 };
170
68c00e3e 171 int i;
01961bbd
DDP
172 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
173 int found_opts = 0, scan, found_pos = -1;
174 const char *found_value;
175 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
176
177 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
178 && popt->ovs_dpdk_options[scan]; ++scan) {
68c00e3e
IM
179 const char *value = smap_get(ovs_other_config,
180 popt->ovs_dpdk_options[scan]);
181 if (value && strlen(value)) {
01961bbd
DDP
182 found_opts++;
183 found_pos = scan;
68c00e3e 184 found_value = value;
01961bbd
DDP
185 }
186 }
187
188 if (!found_opts) {
189 if (popt->default_option) {
190 found_pos = popt->default_option;
191 found_value = popt->default_value;
192 } else {
193 continue;
194 }
195 }
196
197 if (found_opts > 1) {
198 VLOG_ERR("Multiple defined options for %s. Please check your"
199 " database settings and reconfigure if necessary.",
200 popt->category);
201 }
202
68c00e3e
IM
203 if (!args_contains(args, popt->eal_dpdk_options[found_pos])) {
204 svec_add(args, popt->eal_dpdk_options[found_pos]);
205 svec_add(args, found_value);
01961bbd
DDP
206 } else {
207 VLOG_WARN("Ignoring database defined option '%s' due to "
68c00e3e 208 "dpdk-extra config", popt->eal_dpdk_options[found_pos]);
01961bbd
DDP
209 }
210 }
211
7189d54c 212 free(default_dpdk_socket_mem);
01961bbd
DDP
213}
214
68c00e3e
IM
215static void
216construct_dpdk_args(const struct smap *ovs_other_config, struct svec *args)
01961bbd 217{
68c00e3e 218 const char *extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
01961bbd 219
01961bbd 220 if (extra_configuration) {
68c00e3e 221 svec_parse_words(args, extra_configuration);
01961bbd
DDP
222 }
223
68c00e3e
IM
224 construct_dpdk_options(ovs_other_config, args);
225 construct_dpdk_mutex_options(ovs_other_config, args);
01961bbd
DDP
226}
227
736ca516
IM
228static ssize_t
229dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
230{
9fd38f68
IM
231 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600);
232 static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(600, 600);
736ca516
IM
233
234 switch (rte_log_cur_msg_loglevel()) {
235 case RTE_LOG_DEBUG:
d0d1a76e 236 VLOG_DBG_RL(&dbg_rl, "%.*s", (int) size, buf);
736ca516
IM
237 break;
238 case RTE_LOG_INFO:
239 case RTE_LOG_NOTICE:
d0d1a76e 240 VLOG_INFO_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
241 break;
242 case RTE_LOG_WARNING:
d0d1a76e 243 VLOG_WARN_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
244 break;
245 case RTE_LOG_ERR:
d0d1a76e 246 VLOG_ERR_RL(&rl, "%.*s", (int) size, buf);
736ca516
IM
247 break;
248 case RTE_LOG_CRIT:
249 case RTE_LOG_ALERT:
250 case RTE_LOG_EMERG:
d0d1a76e 251 VLOG_EMER("%.*s", (int) size, buf);
736ca516
IM
252 break;
253 default:
254 OVS_NOT_REACHED();
255 }
256
736ca516
IM
257 return size;
258}
259
260static cookie_io_functions_t dpdk_log_func = {
261 .write = dpdk_log_write,
262};
263
d7e2509e 264static bool
01961bbd
DDP
265dpdk_init__(const struct smap *ovs_other_config)
266{
68c00e3e
IM
267 char *sock_dir_subcomponent;
268 char **argv = NULL;
01961bbd 269 int result;
01961bbd
DDP
270 bool auto_determine = true;
271 int err = 0;
15d8655b 272 struct ovs_numa_dump *affinity = NULL;
68c00e3e 273 struct svec args = SVEC_EMPTY_INITIALIZER;
01961bbd 274
736ca516
IM
275 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
276 if (log_stream == NULL) {
277 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
278 } else {
279 setbuf(log_stream, NULL);
280 rte_openlog_stream(log_stream);
281 }
282
6c4f08e2 283 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
01961bbd
DDP
284 NAME_MAX, ovs_other_config,
285 &sock_dir_subcomponent)) {
286 struct stat s;
287 if (!strstr(sock_dir_subcomponent, "..")) {
288 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
289 sock_dir_subcomponent);
290
291 err = stat(vhost_sock_dir, &s);
292 if (err) {
293 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
294 vhost_sock_dir);
295 }
296 } else {
297 vhost_sock_dir = xstrdup(ovs_rundir());
298 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
299 "characters '..' - using %s instead.",
300 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
301 }
302 free(sock_dir_subcomponent);
303 } else {
304 vhost_sock_dir = sock_dir_subcomponent;
305 }
306
a14d1cc8
MK
307 vhost_iommu_enabled = smap_get_bool(ovs_other_config,
308 "vhost-iommu-support", false);
309 VLOG_INFO("IOMMU support for vhost-user-client %s.",
310 vhost_iommu_enabled ? "enabled" : "disabled");
311
30e834dc
LB
312 vhost_postcopy_enabled = smap_get_bool(ovs_other_config,
313 "vhost-postcopy-support", false);
314 if (vhost_postcopy_enabled && memory_locked()) {
315 VLOG_WARN("vhost-postcopy-support and mlockall are not compatible.");
316 vhost_postcopy_enabled = false;
317 }
318 VLOG_INFO("POSTCOPY support for vhost-user-client %s.",
319 vhost_postcopy_enabled ? "enabled" : "disabled");
320
43307ad0
IS
321 per_port_memory = smap_get_bool(ovs_other_config,
322 "per-port-memory", false);
323 VLOG_INFO("Per port memory for DPDK devices %s.",
324 per_port_memory ? "enabled" : "disabled");
325
68c00e3e
IM
326 svec_add(&args, ovs_get_program_name());
327 construct_dpdk_args(ovs_other_config, &args);
01961bbd 328
8411b6cc
IM
329 if (!args_contains(&args, "--legacy-mem")
330 && !args_contains(&args, "--socket-limit")) {
331 const char *arg;
332 size_t i;
333
334 SVEC_FOR_EACH (i, arg, &args) {
335 if (!strcmp(arg, "--socket-mem")) {
336 break;
337 }
338 }
339 if (i < args.n - 1) {
340 svec_add(&args, "--socket-limit");
341 svec_add(&args, args.names[i + 1]);
342 }
343 }
344
68c00e3e
IM
345 if (args_contains(&args, "-c") || args_contains(&args, "-l")) {
346 auto_determine = false;
01961bbd 347 }
01961bbd
DDP
348
349 /**
350 * NOTE: This is an unsophisticated mechanism for determining the DPDK
351 * lcore for the DPDK Master.
352 */
353 if (auto_determine) {
15d8655b 354 const struct ovs_numa_info_core *core;
68c00e3e
IM
355 int cpu = 0;
356
01961bbd 357 /* Get the main thread affinity */
15d8655b
IM
358 affinity = ovs_numa_thread_getaffinity_dump();
359 if (affinity) {
360 cpu = INT_MAX;
361 FOR_EACH_CORE_ON_DUMP (core, affinity) {
362 if (cpu > core->core_id) {
363 cpu = core->core_id;
01961bbd
DDP
364 }
365 }
366 } else {
01961bbd 367 /* User did not set dpdk-lcore-mask and unable to get current
68c00e3e 368 * thread affintity - default to core #0 */
15d8655b 369 VLOG_ERR("Thread getaffinity failed. Using core #0");
01961bbd 370 }
68c00e3e
IM
371 svec_add(&args, "-l");
372 svec_add_nocopy(&args, xasprintf("%d", cpu));
01961bbd
DDP
373 }
374
68c00e3e 375 svec_terminate(&args);
01961bbd
DDP
376
377 optind = 1;
378
379 if (VLOG_IS_INFO_ENABLED()) {
68c00e3e
IM
380 struct ds eal_args = DS_EMPTY_INITIALIZER;
381 char *joined_args = svec_join(&args, " ", ".");
382
383 ds_put_format(&eal_args, "EAL ARGS: %s", joined_args);
01961bbd
DDP
384 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
385 ds_destroy(&eal_args);
68c00e3e 386 free(joined_args);
01961bbd
DDP
387 }
388
68c00e3e
IM
389 /* Copy because 'rte_eal_init' will change the argv, i.e. it will remove
390 * some arguments from it. '+1' to copy the terminating NULL. */
391 argv = xmemdup(args.names, (args.n + 1) * sizeof args.names[0]);
fe11b9e0 392
01961bbd 393 /* Make sure things are initialized ... */
68c00e3e
IM
394 result = rte_eal_init(args.n, argv);
395
396 free(argv);
397 svec_destroy(&args);
01961bbd
DDP
398
399 /* Set the main thread affinity back to pre rte_eal_init() value */
15d8655b
IM
400 if (affinity) {
401 ovs_numa_thread_setaffinity_dump(affinity);
402 ovs_numa_dump_destroy(affinity);
01961bbd
DDP
403 }
404
d7e2509e
AC
405 if (result < 0) {
406 VLOG_EMER("Unable to initialize DPDK: %s", ovs_strerror(rte_errno));
407 return false;
408 }
409
450ff2bc
IM
410 if (VLOG_IS_DBG_ENABLED()) {
411 size_t size;
412 char *response = NULL;
413 FILE *stream = open_memstream(&response, &size);
414
415 if (stream) {
416 rte_memzone_dump(stream);
417 fclose(stream);
418 if (size) {
419 VLOG_DBG("rte_memzone_dump:\n%s", response);
420 }
421 free(response);
422 } else {
423 VLOG_DBG("Could not dump memzone. Unable to open memstream: %s.",
424 ovs_strerror(errno));
425 }
426 }
01961bbd
DDP
427
428 /* We are called from the main thread here */
429 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
430
01961bbd
DDP
431 /* Finally, register the dpdk classes */
432 netdev_dpdk_register();
4f746d52 433 netdev_register_flow_api_provider(&netdev_offload_dpdk);
d7e2509e 434 return true;
01961bbd
DDP
435}
436
437void
438dpdk_init(const struct smap *ovs_other_config)
439{
ec2b0701
DDP
440 static bool enabled = false;
441
442 if (enabled || !ovs_other_config) {
443 return;
444 }
445
3e52fa56
AC
446 const char *dpdk_init_val = smap_get_def(ovs_other_config, "dpdk-init",
447 "false");
448
6455316b
IM
449 bool try_only = !strcasecmp(dpdk_init_val, "try");
450 if (!strcasecmp(dpdk_init_val, "true") || try_only) {
ec2b0701 451 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
01961bbd 452
ec2b0701 453 if (ovsthread_once_start(&once_enable)) {
40c23a57 454 VLOG_INFO("Using %s", rte_version());
ec2b0701 455 VLOG_INFO("DPDK Enabled - initializing...");
d7e2509e
AC
456 enabled = dpdk_init__(ovs_other_config);
457 if (enabled) {
458 VLOG_INFO("DPDK Enabled - initialized");
3e52fa56 459 } else if (!try_only) {
d7e2509e
AC
460 ovs_abort(rte_errno, "Cannot init EAL");
461 }
ec2b0701 462 ovsthread_once_done(&once_enable);
d7e2509e
AC
463 } else {
464 VLOG_ERR_ONCE("DPDK Initialization Failed.");
ec2b0701
DDP
465 }
466 } else {
5575908b 467 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
01961bbd 468 }
3e52fa56 469 dpdk_initialized = enabled;
01961bbd
DDP
470}
471
472const char *
473dpdk_get_vhost_sock_dir(void)
474{
475 return vhost_sock_dir;
476}
477
a14d1cc8
MK
478bool
479dpdk_vhost_iommu_enabled(void)
480{
481 return vhost_iommu_enabled;
482}
483
30e834dc
LB
484bool
485dpdk_vhost_postcopy_enabled(void)
486{
487 return vhost_postcopy_enabled;
488}
489
43307ad0
IS
490bool
491dpdk_per_port_memory(void)
492{
493 return per_port_memory;
494}
495
1276e3db
IM
496bool
497dpdk_available(void)
498{
499 return dpdk_initialized;
500}
501
01961bbd
DDP
502void
503dpdk_set_lcore_id(unsigned cpu)
504{
505 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
506 ovs_assert(cpu != NON_PMD_CORE_ID);
507 RTE_PER_LCORE(_lcore_id) = cpu;
508}
40c23a57
MC
509
510void
511print_dpdk_version(void)
512{
513 puts(rte_version());
514}
3e52fa56
AC
515
516void
517dpdk_status(const struct ovsrec_open_vswitch *cfg)
518{
519 if (cfg) {
520 ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized);
521 ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version());
522 }
523}