]> git.proxmox.com Git - ovs.git/blame - lib/dpdk.c
netdev-dpdk: fix port addition for ports sharing same PCI id
[ovs.git] / lib / dpdk.c
CommitLineData
01961bbd 1/*
5575908b 2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
01961bbd
DDP
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18#include "dpdk.h"
19
736ca516 20#include <stdio.h>
01961bbd
DDP
21#include <sys/types.h>
22#include <sys/stat.h>
23#include <getopt.h>
24
736ca516 25#include <rte_log.h>
01961bbd 26#include <rte_memzone.h>
a0cbc627
CL
27#ifdef DPDK_PDUMP
28#include <rte_mempool.h>
29#include <rte_pdump.h>
30#endif
01961bbd
DDP
31
32#include "dirs.h"
a0cbc627 33#include "fatal-signal.h"
01961bbd
DDP
34#include "netdev-dpdk.h"
35#include "openvswitch/dynamic-string.h"
36#include "openvswitch/vlog.h"
37#include "smap.h"
38
39VLOG_DEFINE_THIS_MODULE(dpdk);
40
736ca516
IM
41static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
42
01961bbd 43static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
a14d1cc8 44static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
01961bbd
DDP
45
46static int
6c4f08e2 47process_vhost_flags(char *flag, const char *default_val, int size,
01961bbd
DDP
48 const struct smap *ovs_other_config,
49 char **new_val)
50{
51 const char *val;
52 int changed = 0;
53
54 val = smap_get(ovs_other_config, flag);
55
56 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
57 * default value.
58 */
59 if (val && (strlen(val) <= size)) {
60 changed = 1;
61 *new_val = xstrdup(val);
62 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
63 } else {
64 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
6c4f08e2 65 *new_val = xstrdup(default_val);
01961bbd
DDP
66 }
67
68 return changed;
69}
70
71static char **
72grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
73{
74 return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
75}
76
77static void
78dpdk_option_extend(char ***argv, int argc, const char *option,
79 const char *value)
80{
81 char **newargv = grow_argv(argv, argc, 2);
82 *argv = newargv;
83 newargv[argc] = xstrdup(option);
84 newargv[argc+1] = xstrdup(value);
85}
86
87static char **
88move_argv(char ***argv, size_t cur_size, char **src_argv, size_t src_argc)
89{
90 char **newargv = grow_argv(argv, cur_size, src_argc);
91 while (src_argc--) {
92 newargv[cur_size+src_argc] = src_argv[src_argc];
93 src_argv[src_argc] = NULL;
94 }
95 return newargv;
96}
97
98static int
99extra_dpdk_args(const char *ovs_extra_config, char ***argv, int argc)
100{
101 int ret = argc;
102 char *release_tok = xstrdup(ovs_extra_config);
103 char *tok, *endptr = NULL;
104
105 for (tok = strtok_r(release_tok, " ", &endptr); tok != NULL;
106 tok = strtok_r(NULL, " ", &endptr)) {
107 char **newarg = grow_argv(argv, ret, 1);
108 *argv = newarg;
109 newarg[ret++] = xstrdup(tok);
110 }
111 free(release_tok);
112 return ret;
113}
114
115static bool
116argv_contains(char **argv_haystack, const size_t argc_haystack,
117 const char *needle)
118{
119 for (size_t i = 0; i < argc_haystack; ++i) {
120 if (!strcmp(argv_haystack[i], needle))
121 return true;
122 }
123 return false;
124}
125
126static int
127construct_dpdk_options(const struct smap *ovs_other_config,
128 char ***argv, const int initial_size,
129 char **extra_args, const size_t extra_argc)
130{
131 struct dpdk_options_map {
132 const char *ovs_configuration;
133 const char *dpdk_option;
134 bool default_enabled;
135 const char *default_value;
136 } opts[] = {
137 {"dpdk-lcore-mask", "-c", false, NULL},
138 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
139 };
140
141 int i, ret = initial_size;
142
143 /*First, construct from the flat-options (non-mutex)*/
144 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
145 const char *lookup = smap_get(ovs_other_config,
146 opts[i].ovs_configuration);
147 if (!lookup && opts[i].default_enabled) {
148 lookup = opts[i].default_value;
149 }
150
151 if (lookup) {
152 if (!argv_contains(extra_args, extra_argc, opts[i].dpdk_option)) {
153 dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
154 ret += 2;
155 } else {
156 VLOG_WARN("Ignoring database defined option '%s' due to "
157 "dpdk_extras config", opts[i].dpdk_option);
158 }
159 }
160 }
161
162 return ret;
163}
164
165#define MAX_DPDK_EXCL_OPTS 10
166
167static int
168construct_dpdk_mutex_options(const struct smap *ovs_other_config,
169 char ***argv, const int initial_size,
170 char **extra_args, const size_t extra_argc)
171{
172 struct dpdk_exclusive_options_map {
173 const char *category;
174 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
175 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
176 const char *default_value;
177 int default_option;
178 } excl_opts[] = {
179 {"memory type",
180 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
181 {"-m", "--socket-mem", NULL,},
182 "1024,0", 1
183 },
184 };
185
186 int i, ret = initial_size;
187 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
188 int found_opts = 0, scan, found_pos = -1;
189 const char *found_value;
190 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
191
192 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
193 && popt->ovs_dpdk_options[scan]; ++scan) {
194 const char *lookup = smap_get(ovs_other_config,
195 popt->ovs_dpdk_options[scan]);
196 if (lookup && strlen(lookup)) {
197 found_opts++;
198 found_pos = scan;
199 found_value = lookup;
200 }
201 }
202
203 if (!found_opts) {
204 if (popt->default_option) {
205 found_pos = popt->default_option;
206 found_value = popt->default_value;
207 } else {
208 continue;
209 }
210 }
211
212 if (found_opts > 1) {
213 VLOG_ERR("Multiple defined options for %s. Please check your"
214 " database settings and reconfigure if necessary.",
215 popt->category);
216 }
217
218 if (!argv_contains(extra_args, extra_argc,
219 popt->eal_dpdk_options[found_pos])) {
220 dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
221 found_value);
222 ret += 2;
223 } else {
224 VLOG_WARN("Ignoring database defined option '%s' due to "
225 "dpdk_extras config", popt->eal_dpdk_options[found_pos]);
226 }
227 }
228
229 return ret;
230}
231
232static int
233get_dpdk_args(const struct smap *ovs_other_config, char ***argv,
234 int argc)
235{
236 const char *extra_configuration;
237 char **extra_args = NULL;
238 int i;
239 size_t extra_argc = 0;
240
241 extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
242 if (extra_configuration) {
243 extra_argc = extra_dpdk_args(extra_configuration, &extra_args, 0);
244 }
245
246 i = construct_dpdk_options(ovs_other_config, argv, argc, extra_args,
247 extra_argc);
248 i = construct_dpdk_mutex_options(ovs_other_config, argv, i, extra_args,
249 extra_argc);
250
251 if (extra_configuration) {
252 *argv = move_argv(argv, i, extra_args, extra_argc);
253 }
254
255 return i + extra_argc;
256}
257
01961bbd 258static void
71e2a07a 259argv_release(char **dpdk_argv, char **dpdk_argv_release, size_t dpdk_argc)
01961bbd
DDP
260{
261 int result;
262 for (result = 0; result < dpdk_argc; ++result) {
fe11b9e0 263 free(dpdk_argv_release[result]);
01961bbd
DDP
264 }
265
fe11b9e0 266 free(dpdk_argv_release);
01961bbd
DDP
267 free(dpdk_argv);
268}
269
736ca516
IM
270static ssize_t
271dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
272{
273 char *str = xmemdup0(buf, size);
274
275 switch (rte_log_cur_msg_loglevel()) {
276 case RTE_LOG_DEBUG:
277 VLOG_DBG("%s", str);
278 break;
279 case RTE_LOG_INFO:
280 case RTE_LOG_NOTICE:
281 VLOG_INFO("%s", str);
282 break;
283 case RTE_LOG_WARNING:
284 VLOG_WARN("%s", str);
285 break;
286 case RTE_LOG_ERR:
287 VLOG_ERR("%s", str);
288 break;
289 case RTE_LOG_CRIT:
290 case RTE_LOG_ALERT:
291 case RTE_LOG_EMERG:
292 VLOG_EMER("%s", str);
293 break;
294 default:
295 OVS_NOT_REACHED();
296 }
297
298 free(str);
299 return size;
300}
301
302static cookie_io_functions_t dpdk_log_func = {
303 .write = dpdk_log_write,
304};
305
01961bbd
DDP
306static void
307dpdk_init__(const struct smap *ovs_other_config)
308{
71e2a07a 309 char **argv = NULL, **argv_to_release = NULL;
01961bbd
DDP
310 int result;
311 int argc, argc_tmp;
312 bool auto_determine = true;
313 int err = 0;
314 cpu_set_t cpuset;
315 char *sock_dir_subcomponent;
316
736ca516
IM
317 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
318 if (log_stream == NULL) {
319 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
320 } else {
321 setbuf(log_stream, NULL);
322 rte_openlog_stream(log_stream);
323 }
324
6c4f08e2 325 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
01961bbd
DDP
326 NAME_MAX, ovs_other_config,
327 &sock_dir_subcomponent)) {
328 struct stat s;
329 if (!strstr(sock_dir_subcomponent, "..")) {
330 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
331 sock_dir_subcomponent);
332
333 err = stat(vhost_sock_dir, &s);
334 if (err) {
335 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
336 vhost_sock_dir);
337 }
338 } else {
339 vhost_sock_dir = xstrdup(ovs_rundir());
340 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
341 "characters '..' - using %s instead.",
342 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
343 }
344 free(sock_dir_subcomponent);
345 } else {
346 vhost_sock_dir = sock_dir_subcomponent;
347 }
348
a14d1cc8
MK
349 vhost_iommu_enabled = smap_get_bool(ovs_other_config,
350 "vhost-iommu-support", false);
351 VLOG_INFO("IOMMU support for vhost-user-client %s.",
352 vhost_iommu_enabled ? "enabled" : "disabled");
353
01961bbd
DDP
354 argv = grow_argv(&argv, 0, 1);
355 argc = 1;
356 argv[0] = xstrdup(ovs_get_program_name());
357 argc_tmp = get_dpdk_args(ovs_other_config, &argv, argc);
358
359 while (argc_tmp != argc) {
360 if (!strcmp("-c", argv[argc]) || !strcmp("-l", argv[argc])) {
361 auto_determine = false;
362 break;
363 }
364 argc++;
365 }
366 argc = argc_tmp;
367
368 /**
369 * NOTE: This is an unsophisticated mechanism for determining the DPDK
370 * lcore for the DPDK Master.
371 */
372 if (auto_determine) {
373 int i;
374 /* Get the main thread affinity */
375 CPU_ZERO(&cpuset);
376 err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
377 &cpuset);
378 if (!err) {
379 for (i = 0; i < CPU_SETSIZE; i++) {
380 if (CPU_ISSET(i, &cpuset)) {
381 argv = grow_argv(&argv, argc, 2);
382 argv[argc++] = xstrdup("-c");
383 argv[argc++] = xasprintf("0x%08llX", (1ULL<<i));
384 i = CPU_SETSIZE;
385 }
386 }
387 } else {
388 VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err);
389 /* User did not set dpdk-lcore-mask and unable to get current
390 * thread affintity - default to core 0x1 */
391 argv = grow_argv(&argv, argc, 2);
392 argv[argc++] = xstrdup("-c");
393 argv[argc++] = xasprintf("0x%X", 1);
394 }
395 }
396
397 argv = grow_argv(&argv, argc, 1);
398 argv[argc] = NULL;
399
400 optind = 1;
401
402 if (VLOG_IS_INFO_ENABLED()) {
403 struct ds eal_args;
404 int opt;
405 ds_init(&eal_args);
406 ds_put_cstr(&eal_args, "EAL ARGS:");
407 for (opt = 0; opt < argc; ++opt) {
408 ds_put_cstr(&eal_args, " ");
409 ds_put_cstr(&eal_args, argv[opt]);
410 }
411 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
412 ds_destroy(&eal_args);
413 }
414
71e2a07a 415 argv_to_release = grow_argv(&argv_to_release, 0, argc);
fe11b9e0 416 for (argc_tmp = 0; argc_tmp < argc; ++argc_tmp) {
71e2a07a 417 argv_to_release[argc_tmp] = argv[argc_tmp];
fe11b9e0
AC
418 }
419
01961bbd
DDP
420 /* Make sure things are initialized ... */
421 result = rte_eal_init(argc, argv);
422 if (result < 0) {
423 ovs_abort(result, "Cannot init EAL");
424 }
71e2a07a 425 argv_release(argv, argv_to_release, argc);
01961bbd
DDP
426
427 /* Set the main thread affinity back to pre rte_eal_init() value */
428 if (auto_determine && !err) {
429 err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
430 &cpuset);
431 if (err) {
432 VLOG_ERR("Thread setaffinity error %d", err);
433 }
434 }
435
01961bbd
DDP
436 rte_memzone_dump(stdout);
437
438 /* We are called from the main thread here */
439 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
440
441#ifdef DPDK_PDUMP
442 VLOG_INFO("DPDK pdump packet capture enabled");
443 err = rte_pdump_init(ovs_rundir());
444 if (err) {
445 VLOG_INFO("Error initialising DPDK pdump");
446 rte_pdump_uninit();
447 } else {
448 char *server_socket_path;
449
450 server_socket_path = xasprintf("%s/%s", ovs_rundir(),
451 "pdump_server_socket");
452 fatal_signal_add_file_to_unlink(server_socket_path);
453 free(server_socket_path);
454 }
455#endif
456
457 /* Finally, register the dpdk classes */
458 netdev_dpdk_register();
459}
460
461void
462dpdk_init(const struct smap *ovs_other_config)
463{
ec2b0701
DDP
464 static bool enabled = false;
465
466 if (enabled || !ovs_other_config) {
467 return;
468 }
469
470 if (smap_get_bool(ovs_other_config, "dpdk-init", false)) {
471 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
01961bbd 472
ec2b0701
DDP
473 if (ovsthread_once_start(&once_enable)) {
474 VLOG_INFO("DPDK Enabled - initializing...");
475 dpdk_init__(ovs_other_config);
476 enabled = true;
477 VLOG_INFO("DPDK Enabled - initialized");
478 ovsthread_once_done(&once_enable);
479 }
480 } else {
5575908b 481 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
01961bbd
DDP
482 }
483}
484
485const char *
486dpdk_get_vhost_sock_dir(void)
487{
488 return vhost_sock_dir;
489}
490
a14d1cc8
MK
491bool
492dpdk_vhost_iommu_enabled(void)
493{
494 return vhost_iommu_enabled;
495}
496
01961bbd
DDP
497void
498dpdk_set_lcore_id(unsigned cpu)
499{
500 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
501 ovs_assert(cpu != NON_PMD_CORE_ID);
502 RTE_PER_LCORE(_lcore_id) = cpu;
503}