]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpdk.c
3f5a55fc14b411c2f4e0ee9fd316260ac6e1135a
[mirror_ovs.git] / lib / dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "dpdk.h"
19
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <getopt.h>
24
25 #include <rte_log.h>
26 #include <rte_memzone.h>
27 #include <rte_version.h>
28 #ifdef DPDK_PDUMP
29 #include <rte_mempool.h>
30 #include <rte_pdump.h>
31 #endif
32
33 #include "dirs.h"
34 #include "fatal-signal.h"
35 #include "netdev-dpdk.h"
36 #include "openvswitch/dynamic-string.h"
37 #include "openvswitch/vlog.h"
38 #include "smap.h"
39
40 VLOG_DEFINE_THIS_MODULE(dpdk);
41
42 static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
43
44 static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
45 static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */
46
47 static int
48 process_vhost_flags(char *flag, const char *default_val, int size,
49 const struct smap *ovs_other_config,
50 char **new_val)
51 {
52 const char *val;
53 int changed = 0;
54
55 val = smap_get(ovs_other_config, flag);
56
57 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
58 * default value.
59 */
60 if (val && (strlen(val) <= size)) {
61 changed = 1;
62 *new_val = xstrdup(val);
63 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
64 } else {
65 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
66 *new_val = xstrdup(default_val);
67 }
68
69 return changed;
70 }
71
72 static char **
73 grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
74 {
75 return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
76 }
77
78 static void
79 dpdk_option_extend(char ***argv, int argc, const char *option,
80 const char *value)
81 {
82 char **newargv = grow_argv(argv, argc, 2);
83 *argv = newargv;
84 newargv[argc] = xstrdup(option);
85 newargv[argc+1] = xstrdup(value);
86 }
87
88 static char **
89 move_argv(char ***argv, size_t cur_size, char **src_argv, size_t src_argc)
90 {
91 char **newargv = grow_argv(argv, cur_size, src_argc);
92 while (src_argc--) {
93 newargv[cur_size+src_argc] = src_argv[src_argc];
94 src_argv[src_argc] = NULL;
95 }
96 return newargv;
97 }
98
99 static int
100 extra_dpdk_args(const char *ovs_extra_config, char ***argv, int argc)
101 {
102 int ret = argc;
103 char *release_tok = xstrdup(ovs_extra_config);
104 char *tok, *endptr = NULL;
105
106 for (tok = strtok_r(release_tok, " ", &endptr); tok != NULL;
107 tok = strtok_r(NULL, " ", &endptr)) {
108 char **newarg = grow_argv(argv, ret, 1);
109 *argv = newarg;
110 newarg[ret++] = xstrdup(tok);
111 }
112 free(release_tok);
113 return ret;
114 }
115
116 static bool
117 argv_contains(char **argv_haystack, const size_t argc_haystack,
118 const char *needle)
119 {
120 for (size_t i = 0; i < argc_haystack; ++i) {
121 if (!strcmp(argv_haystack[i], needle))
122 return true;
123 }
124 return false;
125 }
126
127 static int
128 construct_dpdk_options(const struct smap *ovs_other_config,
129 char ***argv, const int initial_size,
130 char **extra_args, const size_t extra_argc)
131 {
132 struct dpdk_options_map {
133 const char *ovs_configuration;
134 const char *dpdk_option;
135 bool default_enabled;
136 const char *default_value;
137 } opts[] = {
138 {"dpdk-lcore-mask", "-c", false, NULL},
139 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
140 };
141
142 int i, ret = initial_size;
143
144 /*First, construct from the flat-options (non-mutex)*/
145 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
146 const char *lookup = smap_get(ovs_other_config,
147 opts[i].ovs_configuration);
148 if (!lookup && opts[i].default_enabled) {
149 lookup = opts[i].default_value;
150 }
151
152 if (lookup) {
153 if (!argv_contains(extra_args, extra_argc, opts[i].dpdk_option)) {
154 dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
155 ret += 2;
156 } else {
157 VLOG_WARN("Ignoring database defined option '%s' due to "
158 "dpdk_extras config", opts[i].dpdk_option);
159 }
160 }
161 }
162
163 return ret;
164 }
165
166 #define MAX_DPDK_EXCL_OPTS 10
167
168 static int
169 construct_dpdk_mutex_options(const struct smap *ovs_other_config,
170 char ***argv, const int initial_size,
171 char **extra_args, const size_t extra_argc)
172 {
173 struct dpdk_exclusive_options_map {
174 const char *category;
175 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
176 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
177 const char *default_value;
178 int default_option;
179 } excl_opts[] = {
180 {"memory type",
181 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
182 {"-m", "--socket-mem", NULL,},
183 "1024,0", 1
184 },
185 };
186
187 int i, ret = initial_size;
188 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
189 int found_opts = 0, scan, found_pos = -1;
190 const char *found_value;
191 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
192
193 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
194 && popt->ovs_dpdk_options[scan]; ++scan) {
195 const char *lookup = smap_get(ovs_other_config,
196 popt->ovs_dpdk_options[scan]);
197 if (lookup && strlen(lookup)) {
198 found_opts++;
199 found_pos = scan;
200 found_value = lookup;
201 }
202 }
203
204 if (!found_opts) {
205 if (popt->default_option) {
206 found_pos = popt->default_option;
207 found_value = popt->default_value;
208 } else {
209 continue;
210 }
211 }
212
213 if (found_opts > 1) {
214 VLOG_ERR("Multiple defined options for %s. Please check your"
215 " database settings and reconfigure if necessary.",
216 popt->category);
217 }
218
219 if (!argv_contains(extra_args, extra_argc,
220 popt->eal_dpdk_options[found_pos])) {
221 dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
222 found_value);
223 ret += 2;
224 } else {
225 VLOG_WARN("Ignoring database defined option '%s' due to "
226 "dpdk_extras config", popt->eal_dpdk_options[found_pos]);
227 }
228 }
229
230 return ret;
231 }
232
233 static int
234 get_dpdk_args(const struct smap *ovs_other_config, char ***argv,
235 int argc)
236 {
237 const char *extra_configuration;
238 char **extra_args = NULL;
239 int i;
240 size_t extra_argc = 0;
241
242 extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
243 if (extra_configuration) {
244 extra_argc = extra_dpdk_args(extra_configuration, &extra_args, 0);
245 }
246
247 i = construct_dpdk_options(ovs_other_config, argv, argc, extra_args,
248 extra_argc);
249 i = construct_dpdk_mutex_options(ovs_other_config, argv, i, extra_args,
250 extra_argc);
251
252 if (extra_configuration) {
253 *argv = move_argv(argv, i, extra_args, extra_argc);
254 }
255
256 return i + extra_argc;
257 }
258
259 static void
260 argv_release(char **dpdk_argv, char **dpdk_argv_release, size_t dpdk_argc)
261 {
262 int result;
263 for (result = 0; result < dpdk_argc; ++result) {
264 free(dpdk_argv_release[result]);
265 }
266
267 free(dpdk_argv_release);
268 free(dpdk_argv);
269 }
270
271 static ssize_t
272 dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
273 {
274 char *str = xmemdup0(buf, size);
275
276 switch (rte_log_cur_msg_loglevel()) {
277 case RTE_LOG_DEBUG:
278 VLOG_DBG("%s", str);
279 break;
280 case RTE_LOG_INFO:
281 case RTE_LOG_NOTICE:
282 VLOG_INFO("%s", str);
283 break;
284 case RTE_LOG_WARNING:
285 VLOG_WARN("%s", str);
286 break;
287 case RTE_LOG_ERR:
288 VLOG_ERR("%s", str);
289 break;
290 case RTE_LOG_CRIT:
291 case RTE_LOG_ALERT:
292 case RTE_LOG_EMERG:
293 VLOG_EMER("%s", str);
294 break;
295 default:
296 OVS_NOT_REACHED();
297 }
298
299 free(str);
300 return size;
301 }
302
303 static cookie_io_functions_t dpdk_log_func = {
304 .write = dpdk_log_write,
305 };
306
307 static void
308 dpdk_init__(const struct smap *ovs_other_config)
309 {
310 char **argv = NULL, **argv_to_release = NULL;
311 int result;
312 int argc, argc_tmp;
313 bool auto_determine = true;
314 int err = 0;
315 cpu_set_t cpuset;
316 char *sock_dir_subcomponent;
317
318 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
319 if (log_stream == NULL) {
320 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
321 } else {
322 setbuf(log_stream, NULL);
323 rte_openlog_stream(log_stream);
324 }
325
326 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
327 NAME_MAX, ovs_other_config,
328 &sock_dir_subcomponent)) {
329 struct stat s;
330 if (!strstr(sock_dir_subcomponent, "..")) {
331 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
332 sock_dir_subcomponent);
333
334 err = stat(vhost_sock_dir, &s);
335 if (err) {
336 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
337 vhost_sock_dir);
338 }
339 } else {
340 vhost_sock_dir = xstrdup(ovs_rundir());
341 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
342 "characters '..' - using %s instead.",
343 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
344 }
345 free(sock_dir_subcomponent);
346 } else {
347 vhost_sock_dir = sock_dir_subcomponent;
348 }
349
350 vhost_iommu_enabled = smap_get_bool(ovs_other_config,
351 "vhost-iommu-support", false);
352 VLOG_INFO("IOMMU support for vhost-user-client %s.",
353 vhost_iommu_enabled ? "enabled" : "disabled");
354
355 argv = grow_argv(&argv, 0, 1);
356 argc = 1;
357 argv[0] = xstrdup(ovs_get_program_name());
358 argc_tmp = get_dpdk_args(ovs_other_config, &argv, argc);
359
360 while (argc_tmp != argc) {
361 if (!strcmp("-c", argv[argc]) || !strcmp("-l", argv[argc])) {
362 auto_determine = false;
363 break;
364 }
365 argc++;
366 }
367 argc = argc_tmp;
368
369 /**
370 * NOTE: This is an unsophisticated mechanism for determining the DPDK
371 * lcore for the DPDK Master.
372 */
373 if (auto_determine) {
374 int i;
375 /* Get the main thread affinity */
376 CPU_ZERO(&cpuset);
377 err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
378 &cpuset);
379 if (!err) {
380 for (i = 0; i < CPU_SETSIZE; i++) {
381 if (CPU_ISSET(i, &cpuset)) {
382 argv = grow_argv(&argv, argc, 2);
383 argv[argc++] = xstrdup("-c");
384 argv[argc++] = xasprintf("0x%08llX", (1ULL<<i));
385 i = CPU_SETSIZE;
386 }
387 }
388 } else {
389 VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err);
390 /* User did not set dpdk-lcore-mask and unable to get current
391 * thread affintity - default to core 0x1 */
392 argv = grow_argv(&argv, argc, 2);
393 argv[argc++] = xstrdup("-c");
394 argv[argc++] = xasprintf("0x%X", 1);
395 }
396 }
397
398 argv = grow_argv(&argv, argc, 1);
399 argv[argc] = NULL;
400
401 optind = 1;
402
403 if (VLOG_IS_INFO_ENABLED()) {
404 struct ds eal_args;
405 int opt;
406 ds_init(&eal_args);
407 ds_put_cstr(&eal_args, "EAL ARGS:");
408 for (opt = 0; opt < argc; ++opt) {
409 ds_put_cstr(&eal_args, " ");
410 ds_put_cstr(&eal_args, argv[opt]);
411 }
412 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
413 ds_destroy(&eal_args);
414 }
415
416 argv_to_release = grow_argv(&argv_to_release, 0, argc);
417 for (argc_tmp = 0; argc_tmp < argc; ++argc_tmp) {
418 argv_to_release[argc_tmp] = argv[argc_tmp];
419 }
420
421 /* Make sure things are initialized ... */
422 result = rte_eal_init(argc, argv);
423 if (result < 0) {
424 ovs_abort(result, "Cannot init EAL");
425 }
426 argv_release(argv, argv_to_release, argc);
427
428 /* Set the main thread affinity back to pre rte_eal_init() value */
429 if (auto_determine && !err) {
430 err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
431 &cpuset);
432 if (err) {
433 VLOG_ERR("Thread setaffinity error %d", err);
434 }
435 }
436
437 rte_memzone_dump(stdout);
438
439 /* We are called from the main thread here */
440 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
441
442 #ifdef DPDK_PDUMP
443 VLOG_INFO("DPDK pdump packet capture enabled");
444 err = rte_pdump_init(ovs_rundir());
445 if (err) {
446 VLOG_INFO("Error initialising DPDK pdump");
447 rte_pdump_uninit();
448 } else {
449 char *server_socket_path;
450
451 server_socket_path = xasprintf("%s/%s", ovs_rundir(),
452 "pdump_server_socket");
453 fatal_signal_add_file_to_unlink(server_socket_path);
454 free(server_socket_path);
455 }
456 #endif
457
458 /* Finally, register the dpdk classes */
459 netdev_dpdk_register();
460 }
461
462 void
463 dpdk_init(const struct smap *ovs_other_config)
464 {
465 static bool enabled = false;
466
467 if (enabled || !ovs_other_config) {
468 return;
469 }
470
471 if (smap_get_bool(ovs_other_config, "dpdk-init", false)) {
472 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
473
474 if (ovsthread_once_start(&once_enable)) {
475 VLOG_INFO("Using %s", rte_version());
476 VLOG_INFO("DPDK Enabled - initializing...");
477 dpdk_init__(ovs_other_config);
478 enabled = true;
479 VLOG_INFO("DPDK Enabled - initialized");
480 ovsthread_once_done(&once_enable);
481 }
482 } else {
483 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
484 }
485 }
486
487 const char *
488 dpdk_get_vhost_sock_dir(void)
489 {
490 return vhost_sock_dir;
491 }
492
493 bool
494 dpdk_vhost_iommu_enabled(void)
495 {
496 return vhost_iommu_enabled;
497 }
498
499 void
500 dpdk_set_lcore_id(unsigned cpu)
501 {
502 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
503 ovs_assert(cpu != NON_PMD_CORE_ID);
504 RTE_PER_LCORE(_lcore_id) = cpu;
505 }
506
507 void
508 print_dpdk_version(void)
509 {
510 puts(rte_version());
511 }