]> git.proxmox.com Git - ovs.git/blob - lib/dpdk.c
OF support and translation of generic encap and decap
[ovs.git] / lib / dpdk.c
1 /*
2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include "dpdk.h"
19
20 #include <stdio.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <getopt.h>
24
25 #include <rte_log.h>
26 #include <rte_memzone.h>
27 #ifdef DPDK_PDUMP
28 #include <rte_mempool.h>
29 #include <rte_pdump.h>
30 #endif
31
32 #include "dirs.h"
33 #include "fatal-signal.h"
34 #include "netdev-dpdk.h"
35 #include "openvswitch/dynamic-string.h"
36 #include "openvswitch/vlog.h"
37 #include "smap.h"
38
39 VLOG_DEFINE_THIS_MODULE(dpdk);
40
41 static FILE *log_stream = NULL; /* Stream for DPDK log redirection */
42
43 static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */
44
45 static int
46 process_vhost_flags(char *flag, const char *default_val, int size,
47 const struct smap *ovs_other_config,
48 char **new_val)
49 {
50 const char *val;
51 int changed = 0;
52
53 val = smap_get(ovs_other_config, flag);
54
55 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
56 * default value.
57 */
58 if (val && (strlen(val) <= size)) {
59 changed = 1;
60 *new_val = xstrdup(val);
61 VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
62 } else {
63 VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
64 *new_val = xstrdup(default_val);
65 }
66
67 return changed;
68 }
69
70 static char **
71 grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
72 {
73 return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by));
74 }
75
76 static void
77 dpdk_option_extend(char ***argv, int argc, const char *option,
78 const char *value)
79 {
80 char **newargv = grow_argv(argv, argc, 2);
81 *argv = newargv;
82 newargv[argc] = xstrdup(option);
83 newargv[argc+1] = xstrdup(value);
84 }
85
86 static char **
87 move_argv(char ***argv, size_t cur_size, char **src_argv, size_t src_argc)
88 {
89 char **newargv = grow_argv(argv, cur_size, src_argc);
90 while (src_argc--) {
91 newargv[cur_size+src_argc] = src_argv[src_argc];
92 src_argv[src_argc] = NULL;
93 }
94 return newargv;
95 }
96
97 static int
98 extra_dpdk_args(const char *ovs_extra_config, char ***argv, int argc)
99 {
100 int ret = argc;
101 char *release_tok = xstrdup(ovs_extra_config);
102 char *tok, *endptr = NULL;
103
104 for (tok = strtok_r(release_tok, " ", &endptr); tok != NULL;
105 tok = strtok_r(NULL, " ", &endptr)) {
106 char **newarg = grow_argv(argv, ret, 1);
107 *argv = newarg;
108 newarg[ret++] = xstrdup(tok);
109 }
110 free(release_tok);
111 return ret;
112 }
113
114 static bool
115 argv_contains(char **argv_haystack, const size_t argc_haystack,
116 const char *needle)
117 {
118 for (size_t i = 0; i < argc_haystack; ++i) {
119 if (!strcmp(argv_haystack[i], needle))
120 return true;
121 }
122 return false;
123 }
124
125 static int
126 construct_dpdk_options(const struct smap *ovs_other_config,
127 char ***argv, const int initial_size,
128 char **extra_args, const size_t extra_argc)
129 {
130 struct dpdk_options_map {
131 const char *ovs_configuration;
132 const char *dpdk_option;
133 bool default_enabled;
134 const char *default_value;
135 } opts[] = {
136 {"dpdk-lcore-mask", "-c", false, NULL},
137 {"dpdk-hugepage-dir", "--huge-dir", false, NULL},
138 };
139
140 int i, ret = initial_size;
141
142 /*First, construct from the flat-options (non-mutex)*/
143 for (i = 0; i < ARRAY_SIZE(opts); ++i) {
144 const char *lookup = smap_get(ovs_other_config,
145 opts[i].ovs_configuration);
146 if (!lookup && opts[i].default_enabled) {
147 lookup = opts[i].default_value;
148 }
149
150 if (lookup) {
151 if (!argv_contains(extra_args, extra_argc, opts[i].dpdk_option)) {
152 dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup);
153 ret += 2;
154 } else {
155 VLOG_WARN("Ignoring database defined option '%s' due to "
156 "dpdk_extras config", opts[i].dpdk_option);
157 }
158 }
159 }
160
161 return ret;
162 }
163
164 #define MAX_DPDK_EXCL_OPTS 10
165
166 static int
167 construct_dpdk_mutex_options(const struct smap *ovs_other_config,
168 char ***argv, const int initial_size,
169 char **extra_args, const size_t extra_argc)
170 {
171 struct dpdk_exclusive_options_map {
172 const char *category;
173 const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS];
174 const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS];
175 const char *default_value;
176 int default_option;
177 } excl_opts[] = {
178 {"memory type",
179 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,},
180 {"-m", "--socket-mem", NULL,},
181 "1024,0", 1
182 },
183 };
184
185 int i, ret = initial_size;
186 for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) {
187 int found_opts = 0, scan, found_pos = -1;
188 const char *found_value;
189 struct dpdk_exclusive_options_map *popt = &excl_opts[i];
190
191 for (scan = 0; scan < MAX_DPDK_EXCL_OPTS
192 && popt->ovs_dpdk_options[scan]; ++scan) {
193 const char *lookup = smap_get(ovs_other_config,
194 popt->ovs_dpdk_options[scan]);
195 if (lookup && strlen(lookup)) {
196 found_opts++;
197 found_pos = scan;
198 found_value = lookup;
199 }
200 }
201
202 if (!found_opts) {
203 if (popt->default_option) {
204 found_pos = popt->default_option;
205 found_value = popt->default_value;
206 } else {
207 continue;
208 }
209 }
210
211 if (found_opts > 1) {
212 VLOG_ERR("Multiple defined options for %s. Please check your"
213 " database settings and reconfigure if necessary.",
214 popt->category);
215 }
216
217 if (!argv_contains(extra_args, extra_argc,
218 popt->eal_dpdk_options[found_pos])) {
219 dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos],
220 found_value);
221 ret += 2;
222 } else {
223 VLOG_WARN("Ignoring database defined option '%s' due to "
224 "dpdk_extras config", popt->eal_dpdk_options[found_pos]);
225 }
226 }
227
228 return ret;
229 }
230
231 static int
232 get_dpdk_args(const struct smap *ovs_other_config, char ***argv,
233 int argc)
234 {
235 const char *extra_configuration;
236 char **extra_args = NULL;
237 int i;
238 size_t extra_argc = 0;
239
240 extra_configuration = smap_get(ovs_other_config, "dpdk-extra");
241 if (extra_configuration) {
242 extra_argc = extra_dpdk_args(extra_configuration, &extra_args, 0);
243 }
244
245 i = construct_dpdk_options(ovs_other_config, argv, argc, extra_args,
246 extra_argc);
247 i = construct_dpdk_mutex_options(ovs_other_config, argv, i, extra_args,
248 extra_argc);
249
250 if (extra_configuration) {
251 *argv = move_argv(argv, i, extra_args, extra_argc);
252 }
253
254 return i + extra_argc;
255 }
256
257 static void
258 argv_release(char **dpdk_argv, char **dpdk_argv_release, size_t dpdk_argc)
259 {
260 int result;
261 for (result = 0; result < dpdk_argc; ++result) {
262 free(dpdk_argv_release[result]);
263 }
264
265 free(dpdk_argv_release);
266 free(dpdk_argv);
267 }
268
269 static ssize_t
270 dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size)
271 {
272 char *str = xmemdup0(buf, size);
273
274 switch (rte_log_cur_msg_loglevel()) {
275 case RTE_LOG_DEBUG:
276 VLOG_DBG("%s", str);
277 break;
278 case RTE_LOG_INFO:
279 case RTE_LOG_NOTICE:
280 VLOG_INFO("%s", str);
281 break;
282 case RTE_LOG_WARNING:
283 VLOG_WARN("%s", str);
284 break;
285 case RTE_LOG_ERR:
286 VLOG_ERR("%s", str);
287 break;
288 case RTE_LOG_CRIT:
289 case RTE_LOG_ALERT:
290 case RTE_LOG_EMERG:
291 VLOG_EMER("%s", str);
292 break;
293 default:
294 OVS_NOT_REACHED();
295 }
296
297 free(str);
298 return size;
299 }
300
301 static cookie_io_functions_t dpdk_log_func = {
302 .write = dpdk_log_write,
303 };
304
305 static void
306 dpdk_init__(const struct smap *ovs_other_config)
307 {
308 char **argv = NULL, **argv_to_release = NULL;
309 int result;
310 int argc, argc_tmp;
311 bool auto_determine = true;
312 int err = 0;
313 cpu_set_t cpuset;
314 char *sock_dir_subcomponent;
315
316 log_stream = fopencookie(NULL, "w+", dpdk_log_func);
317 if (log_stream == NULL) {
318 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno));
319 } else {
320 setbuf(log_stream, NULL);
321 rte_openlog_stream(log_stream);
322 }
323
324 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
325 NAME_MAX, ovs_other_config,
326 &sock_dir_subcomponent)) {
327 struct stat s;
328 if (!strstr(sock_dir_subcomponent, "..")) {
329 vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(),
330 sock_dir_subcomponent);
331
332 err = stat(vhost_sock_dir, &s);
333 if (err) {
334 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
335 vhost_sock_dir);
336 }
337 } else {
338 vhost_sock_dir = xstrdup(ovs_rundir());
339 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
340 "characters '..' - using %s instead.",
341 ovs_rundir(), sock_dir_subcomponent, ovs_rundir());
342 }
343 free(sock_dir_subcomponent);
344 } else {
345 vhost_sock_dir = sock_dir_subcomponent;
346 }
347
348 argv = grow_argv(&argv, 0, 1);
349 argc = 1;
350 argv[0] = xstrdup(ovs_get_program_name());
351 argc_tmp = get_dpdk_args(ovs_other_config, &argv, argc);
352
353 while (argc_tmp != argc) {
354 if (!strcmp("-c", argv[argc]) || !strcmp("-l", argv[argc])) {
355 auto_determine = false;
356 break;
357 }
358 argc++;
359 }
360 argc = argc_tmp;
361
362 /**
363 * NOTE: This is an unsophisticated mechanism for determining the DPDK
364 * lcore for the DPDK Master.
365 */
366 if (auto_determine) {
367 int i;
368 /* Get the main thread affinity */
369 CPU_ZERO(&cpuset);
370 err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t),
371 &cpuset);
372 if (!err) {
373 for (i = 0; i < CPU_SETSIZE; i++) {
374 if (CPU_ISSET(i, &cpuset)) {
375 argv = grow_argv(&argv, argc, 2);
376 argv[argc++] = xstrdup("-c");
377 argv[argc++] = xasprintf("0x%08llX", (1ULL<<i));
378 i = CPU_SETSIZE;
379 }
380 }
381 } else {
382 VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err);
383 /* User did not set dpdk-lcore-mask and unable to get current
384 * thread affintity - default to core 0x1 */
385 argv = grow_argv(&argv, argc, 2);
386 argv[argc++] = xstrdup("-c");
387 argv[argc++] = xasprintf("0x%X", 1);
388 }
389 }
390
391 argv = grow_argv(&argv, argc, 1);
392 argv[argc] = NULL;
393
394 optind = 1;
395
396 if (VLOG_IS_INFO_ENABLED()) {
397 struct ds eal_args;
398 int opt;
399 ds_init(&eal_args);
400 ds_put_cstr(&eal_args, "EAL ARGS:");
401 for (opt = 0; opt < argc; ++opt) {
402 ds_put_cstr(&eal_args, " ");
403 ds_put_cstr(&eal_args, argv[opt]);
404 }
405 VLOG_INFO("%s", ds_cstr_ro(&eal_args));
406 ds_destroy(&eal_args);
407 }
408
409 argv_to_release = grow_argv(&argv_to_release, 0, argc);
410 for (argc_tmp = 0; argc_tmp < argc; ++argc_tmp) {
411 argv_to_release[argc_tmp] = argv[argc_tmp];
412 }
413
414 /* Make sure things are initialized ... */
415 result = rte_eal_init(argc, argv);
416 if (result < 0) {
417 ovs_abort(result, "Cannot init EAL");
418 }
419 argv_release(argv, argv_to_release, argc);
420
421 /* Set the main thread affinity back to pre rte_eal_init() value */
422 if (auto_determine && !err) {
423 err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t),
424 &cpuset);
425 if (err) {
426 VLOG_ERR("Thread setaffinity error %d", err);
427 }
428 }
429
430 rte_memzone_dump(stdout);
431
432 /* We are called from the main thread here */
433 RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
434
435 #ifdef DPDK_PDUMP
436 VLOG_INFO("DPDK pdump packet capture enabled");
437 err = rte_pdump_init(ovs_rundir());
438 if (err) {
439 VLOG_INFO("Error initialising DPDK pdump");
440 rte_pdump_uninit();
441 } else {
442 char *server_socket_path;
443
444 server_socket_path = xasprintf("%s/%s", ovs_rundir(),
445 "pdump_server_socket");
446 fatal_signal_add_file_to_unlink(server_socket_path);
447 free(server_socket_path);
448 }
449 #endif
450
451 /* Finally, register the dpdk classes */
452 netdev_dpdk_register();
453 }
454
455 void
456 dpdk_init(const struct smap *ovs_other_config)
457 {
458 static bool enabled = false;
459
460 if (enabled || !ovs_other_config) {
461 return;
462 }
463
464 if (smap_get_bool(ovs_other_config, "dpdk-init", false)) {
465 static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
466
467 if (ovsthread_once_start(&once_enable)) {
468 VLOG_INFO("DPDK Enabled - initializing...");
469 dpdk_init__(ovs_other_config);
470 enabled = true;
471 VLOG_INFO("DPDK Enabled - initialized");
472 ovsthread_once_done(&once_enable);
473 }
474 } else {
475 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
476 }
477 }
478
479 const char *
480 dpdk_get_vhost_sock_dir(void)
481 {
482 return vhost_sock_dir;
483 }
484
485 void
486 dpdk_set_lcore_id(unsigned cpu)
487 {
488 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
489 ovs_assert(cpu != NON_PMD_CORE_ID);
490 RTE_PER_LCORE(_lcore_id) = cpu;
491 }