]>
Commit | Line | Data |
---|---|---|
01961bbd | 1 | /* |
5575908b | 2 | * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. |
01961bbd DDP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpdk.h" | |
19 | ||
736ca516 | 20 | #include <stdio.h> |
01961bbd DDP |
21 | #include <sys/types.h> |
22 | #include <sys/stat.h> | |
23 | #include <getopt.h> | |
24 | ||
736ca516 | 25 | #include <rte_log.h> |
01961bbd | 26 | #include <rte_memzone.h> |
a0cbc627 CL |
27 | #ifdef DPDK_PDUMP |
28 | #include <rte_mempool.h> | |
29 | #include <rte_pdump.h> | |
30 | #endif | |
01961bbd DDP |
31 | |
32 | #include "dirs.h" | |
a0cbc627 | 33 | #include "fatal-signal.h" |
01961bbd DDP |
34 | #include "netdev-dpdk.h" |
35 | #include "openvswitch/dynamic-string.h" | |
36 | #include "openvswitch/vlog.h" | |
37 | #include "smap.h" | |
38 | ||
39 | VLOG_DEFINE_THIS_MODULE(dpdk); | |
40 | ||
736ca516 IM |
41 | static FILE *log_stream = NULL; /* Stream for DPDK log redirection */ |
42 | ||
01961bbd | 43 | static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ |
a14d1cc8 | 44 | static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */ |
01961bbd DDP |
45 | |
46 | static int | |
6c4f08e2 | 47 | process_vhost_flags(char *flag, const char *default_val, int size, |
01961bbd DDP |
48 | const struct smap *ovs_other_config, |
49 | char **new_val) | |
50 | { | |
51 | const char *val; | |
52 | int changed = 0; | |
53 | ||
54 | val = smap_get(ovs_other_config, flag); | |
55 | ||
56 | /* Process the vhost-sock-dir flag if it is provided, otherwise resort to | |
57 | * default value. | |
58 | */ | |
59 | if (val && (strlen(val) <= size)) { | |
60 | changed = 1; | |
61 | *new_val = xstrdup(val); | |
62 | VLOG_INFO("User-provided %s in use: %s", flag, *new_val); | |
63 | } else { | |
64 | VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); | |
6c4f08e2 | 65 | *new_val = xstrdup(default_val); |
01961bbd DDP |
66 | } |
67 | ||
68 | return changed; | |
69 | } | |
70 | ||
71 | static char ** | |
72 | grow_argv(char ***argv, size_t cur_siz, size_t grow_by) | |
73 | { | |
74 | return xrealloc(*argv, sizeof(char *) * (cur_siz + grow_by)); | |
75 | } | |
76 | ||
77 | static void | |
78 | dpdk_option_extend(char ***argv, int argc, const char *option, | |
79 | const char *value) | |
80 | { | |
81 | char **newargv = grow_argv(argv, argc, 2); | |
82 | *argv = newargv; | |
83 | newargv[argc] = xstrdup(option); | |
84 | newargv[argc+1] = xstrdup(value); | |
85 | } | |
86 | ||
87 | static char ** | |
88 | move_argv(char ***argv, size_t cur_size, char **src_argv, size_t src_argc) | |
89 | { | |
90 | char **newargv = grow_argv(argv, cur_size, src_argc); | |
91 | while (src_argc--) { | |
92 | newargv[cur_size+src_argc] = src_argv[src_argc]; | |
93 | src_argv[src_argc] = NULL; | |
94 | } | |
95 | return newargv; | |
96 | } | |
97 | ||
98 | static int | |
99 | extra_dpdk_args(const char *ovs_extra_config, char ***argv, int argc) | |
100 | { | |
101 | int ret = argc; | |
102 | char *release_tok = xstrdup(ovs_extra_config); | |
103 | char *tok, *endptr = NULL; | |
104 | ||
105 | for (tok = strtok_r(release_tok, " ", &endptr); tok != NULL; | |
106 | tok = strtok_r(NULL, " ", &endptr)) { | |
107 | char **newarg = grow_argv(argv, ret, 1); | |
108 | *argv = newarg; | |
109 | newarg[ret++] = xstrdup(tok); | |
110 | } | |
111 | free(release_tok); | |
112 | return ret; | |
113 | } | |
114 | ||
115 | static bool | |
116 | argv_contains(char **argv_haystack, const size_t argc_haystack, | |
117 | const char *needle) | |
118 | { | |
119 | for (size_t i = 0; i < argc_haystack; ++i) { | |
120 | if (!strcmp(argv_haystack[i], needle)) | |
121 | return true; | |
122 | } | |
123 | return false; | |
124 | } | |
125 | ||
126 | static int | |
127 | construct_dpdk_options(const struct smap *ovs_other_config, | |
128 | char ***argv, const int initial_size, | |
129 | char **extra_args, const size_t extra_argc) | |
130 | { | |
131 | struct dpdk_options_map { | |
132 | const char *ovs_configuration; | |
133 | const char *dpdk_option; | |
134 | bool default_enabled; | |
135 | const char *default_value; | |
136 | } opts[] = { | |
137 | {"dpdk-lcore-mask", "-c", false, NULL}, | |
138 | {"dpdk-hugepage-dir", "--huge-dir", false, NULL}, | |
139 | }; | |
140 | ||
141 | int i, ret = initial_size; | |
142 | ||
143 | /*First, construct from the flat-options (non-mutex)*/ | |
144 | for (i = 0; i < ARRAY_SIZE(opts); ++i) { | |
145 | const char *lookup = smap_get(ovs_other_config, | |
146 | opts[i].ovs_configuration); | |
147 | if (!lookup && opts[i].default_enabled) { | |
148 | lookup = opts[i].default_value; | |
149 | } | |
150 | ||
151 | if (lookup) { | |
152 | if (!argv_contains(extra_args, extra_argc, opts[i].dpdk_option)) { | |
153 | dpdk_option_extend(argv, ret, opts[i].dpdk_option, lookup); | |
154 | ret += 2; | |
155 | } else { | |
156 | VLOG_WARN("Ignoring database defined option '%s' due to " | |
157 | "dpdk_extras config", opts[i].dpdk_option); | |
158 | } | |
159 | } | |
160 | } | |
161 | ||
162 | return ret; | |
163 | } | |
164 | ||
165 | #define MAX_DPDK_EXCL_OPTS 10 | |
166 | ||
167 | static int | |
168 | construct_dpdk_mutex_options(const struct smap *ovs_other_config, | |
169 | char ***argv, const int initial_size, | |
170 | char **extra_args, const size_t extra_argc) | |
171 | { | |
172 | struct dpdk_exclusive_options_map { | |
173 | const char *category; | |
174 | const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS]; | |
175 | const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS]; | |
176 | const char *default_value; | |
177 | int default_option; | |
178 | } excl_opts[] = { | |
179 | {"memory type", | |
180 | {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,}, | |
181 | {"-m", "--socket-mem", NULL,}, | |
182 | "1024,0", 1 | |
183 | }, | |
184 | }; | |
185 | ||
186 | int i, ret = initial_size; | |
187 | for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) { | |
188 | int found_opts = 0, scan, found_pos = -1; | |
189 | const char *found_value; | |
190 | struct dpdk_exclusive_options_map *popt = &excl_opts[i]; | |
191 | ||
192 | for (scan = 0; scan < MAX_DPDK_EXCL_OPTS | |
193 | && popt->ovs_dpdk_options[scan]; ++scan) { | |
194 | const char *lookup = smap_get(ovs_other_config, | |
195 | popt->ovs_dpdk_options[scan]); | |
196 | if (lookup && strlen(lookup)) { | |
197 | found_opts++; | |
198 | found_pos = scan; | |
199 | found_value = lookup; | |
200 | } | |
201 | } | |
202 | ||
203 | if (!found_opts) { | |
204 | if (popt->default_option) { | |
205 | found_pos = popt->default_option; | |
206 | found_value = popt->default_value; | |
207 | } else { | |
208 | continue; | |
209 | } | |
210 | } | |
211 | ||
212 | if (found_opts > 1) { | |
213 | VLOG_ERR("Multiple defined options for %s. Please check your" | |
214 | " database settings and reconfigure if necessary.", | |
215 | popt->category); | |
216 | } | |
217 | ||
218 | if (!argv_contains(extra_args, extra_argc, | |
219 | popt->eal_dpdk_options[found_pos])) { | |
220 | dpdk_option_extend(argv, ret, popt->eal_dpdk_options[found_pos], | |
221 | found_value); | |
222 | ret += 2; | |
223 | } else { | |
224 | VLOG_WARN("Ignoring database defined option '%s' due to " | |
225 | "dpdk_extras config", popt->eal_dpdk_options[found_pos]); | |
226 | } | |
227 | } | |
228 | ||
229 | return ret; | |
230 | } | |
231 | ||
232 | static int | |
233 | get_dpdk_args(const struct smap *ovs_other_config, char ***argv, | |
234 | int argc) | |
235 | { | |
236 | const char *extra_configuration; | |
237 | char **extra_args = NULL; | |
238 | int i; | |
239 | size_t extra_argc = 0; | |
240 | ||
241 | extra_configuration = smap_get(ovs_other_config, "dpdk-extra"); | |
242 | if (extra_configuration) { | |
243 | extra_argc = extra_dpdk_args(extra_configuration, &extra_args, 0); | |
244 | } | |
245 | ||
246 | i = construct_dpdk_options(ovs_other_config, argv, argc, extra_args, | |
247 | extra_argc); | |
248 | i = construct_dpdk_mutex_options(ovs_other_config, argv, i, extra_args, | |
249 | extra_argc); | |
250 | ||
251 | if (extra_configuration) { | |
252 | *argv = move_argv(argv, i, extra_args, extra_argc); | |
253 | } | |
254 | ||
255 | return i + extra_argc; | |
256 | } | |
257 | ||
01961bbd | 258 | static void |
71e2a07a | 259 | argv_release(char **dpdk_argv, char **dpdk_argv_release, size_t dpdk_argc) |
01961bbd DDP |
260 | { |
261 | int result; | |
262 | for (result = 0; result < dpdk_argc; ++result) { | |
fe11b9e0 | 263 | free(dpdk_argv_release[result]); |
01961bbd DDP |
264 | } |
265 | ||
fe11b9e0 | 266 | free(dpdk_argv_release); |
01961bbd DDP |
267 | free(dpdk_argv); |
268 | } | |
269 | ||
736ca516 IM |
270 | static ssize_t |
271 | dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size) | |
272 | { | |
273 | char *str = xmemdup0(buf, size); | |
274 | ||
275 | switch (rte_log_cur_msg_loglevel()) { | |
276 | case RTE_LOG_DEBUG: | |
277 | VLOG_DBG("%s", str); | |
278 | break; | |
279 | case RTE_LOG_INFO: | |
280 | case RTE_LOG_NOTICE: | |
281 | VLOG_INFO("%s", str); | |
282 | break; | |
283 | case RTE_LOG_WARNING: | |
284 | VLOG_WARN("%s", str); | |
285 | break; | |
286 | case RTE_LOG_ERR: | |
287 | VLOG_ERR("%s", str); | |
288 | break; | |
289 | case RTE_LOG_CRIT: | |
290 | case RTE_LOG_ALERT: | |
291 | case RTE_LOG_EMERG: | |
292 | VLOG_EMER("%s", str); | |
293 | break; | |
294 | default: | |
295 | OVS_NOT_REACHED(); | |
296 | } | |
297 | ||
298 | free(str); | |
299 | return size; | |
300 | } | |
301 | ||
302 | static cookie_io_functions_t dpdk_log_func = { | |
303 | .write = dpdk_log_write, | |
304 | }; | |
305 | ||
01961bbd DDP |
306 | static void |
307 | dpdk_init__(const struct smap *ovs_other_config) | |
308 | { | |
71e2a07a | 309 | char **argv = NULL, **argv_to_release = NULL; |
01961bbd DDP |
310 | int result; |
311 | int argc, argc_tmp; | |
312 | bool auto_determine = true; | |
313 | int err = 0; | |
314 | cpu_set_t cpuset; | |
315 | char *sock_dir_subcomponent; | |
316 | ||
736ca516 IM |
317 | log_stream = fopencookie(NULL, "w+", dpdk_log_func); |
318 | if (log_stream == NULL) { | |
319 | VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno)); | |
320 | } else { | |
321 | setbuf(log_stream, NULL); | |
322 | rte_openlog_stream(log_stream); | |
323 | } | |
324 | ||
6c4f08e2 | 325 | if (process_vhost_flags("vhost-sock-dir", ovs_rundir(), |
01961bbd DDP |
326 | NAME_MAX, ovs_other_config, |
327 | &sock_dir_subcomponent)) { | |
328 | struct stat s; | |
329 | if (!strstr(sock_dir_subcomponent, "..")) { | |
330 | vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(), | |
331 | sock_dir_subcomponent); | |
332 | ||
333 | err = stat(vhost_sock_dir, &s); | |
334 | if (err) { | |
335 | VLOG_ERR("vhost-user sock directory '%s' does not exist.", | |
336 | vhost_sock_dir); | |
337 | } | |
338 | } else { | |
339 | vhost_sock_dir = xstrdup(ovs_rundir()); | |
340 | VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid" | |
341 | "characters '..' - using %s instead.", | |
342 | ovs_rundir(), sock_dir_subcomponent, ovs_rundir()); | |
343 | } | |
344 | free(sock_dir_subcomponent); | |
345 | } else { | |
346 | vhost_sock_dir = sock_dir_subcomponent; | |
347 | } | |
348 | ||
a14d1cc8 MK |
349 | vhost_iommu_enabled = smap_get_bool(ovs_other_config, |
350 | "vhost-iommu-support", false); | |
351 | VLOG_INFO("IOMMU support for vhost-user-client %s.", | |
352 | vhost_iommu_enabled ? "enabled" : "disabled"); | |
353 | ||
01961bbd DDP |
354 | argv = grow_argv(&argv, 0, 1); |
355 | argc = 1; | |
356 | argv[0] = xstrdup(ovs_get_program_name()); | |
357 | argc_tmp = get_dpdk_args(ovs_other_config, &argv, argc); | |
358 | ||
359 | while (argc_tmp != argc) { | |
360 | if (!strcmp("-c", argv[argc]) || !strcmp("-l", argv[argc])) { | |
361 | auto_determine = false; | |
362 | break; | |
363 | } | |
364 | argc++; | |
365 | } | |
366 | argc = argc_tmp; | |
367 | ||
368 | /** | |
369 | * NOTE: This is an unsophisticated mechanism for determining the DPDK | |
370 | * lcore for the DPDK Master. | |
371 | */ | |
372 | if (auto_determine) { | |
373 | int i; | |
374 | /* Get the main thread affinity */ | |
375 | CPU_ZERO(&cpuset); | |
376 | err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), | |
377 | &cpuset); | |
378 | if (!err) { | |
379 | for (i = 0; i < CPU_SETSIZE; i++) { | |
380 | if (CPU_ISSET(i, &cpuset)) { | |
381 | argv = grow_argv(&argv, argc, 2); | |
382 | argv[argc++] = xstrdup("-c"); | |
383 | argv[argc++] = xasprintf("0x%08llX", (1ULL<<i)); | |
384 | i = CPU_SETSIZE; | |
385 | } | |
386 | } | |
387 | } else { | |
388 | VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err); | |
389 | /* User did not set dpdk-lcore-mask and unable to get current | |
390 | * thread affintity - default to core 0x1 */ | |
391 | argv = grow_argv(&argv, argc, 2); | |
392 | argv[argc++] = xstrdup("-c"); | |
393 | argv[argc++] = xasprintf("0x%X", 1); | |
394 | } | |
395 | } | |
396 | ||
397 | argv = grow_argv(&argv, argc, 1); | |
398 | argv[argc] = NULL; | |
399 | ||
400 | optind = 1; | |
401 | ||
402 | if (VLOG_IS_INFO_ENABLED()) { | |
403 | struct ds eal_args; | |
404 | int opt; | |
405 | ds_init(&eal_args); | |
406 | ds_put_cstr(&eal_args, "EAL ARGS:"); | |
407 | for (opt = 0; opt < argc; ++opt) { | |
408 | ds_put_cstr(&eal_args, " "); | |
409 | ds_put_cstr(&eal_args, argv[opt]); | |
410 | } | |
411 | VLOG_INFO("%s", ds_cstr_ro(&eal_args)); | |
412 | ds_destroy(&eal_args); | |
413 | } | |
414 | ||
71e2a07a | 415 | argv_to_release = grow_argv(&argv_to_release, 0, argc); |
fe11b9e0 | 416 | for (argc_tmp = 0; argc_tmp < argc; ++argc_tmp) { |
71e2a07a | 417 | argv_to_release[argc_tmp] = argv[argc_tmp]; |
fe11b9e0 AC |
418 | } |
419 | ||
01961bbd DDP |
420 | /* Make sure things are initialized ... */ |
421 | result = rte_eal_init(argc, argv); | |
422 | if (result < 0) { | |
423 | ovs_abort(result, "Cannot init EAL"); | |
424 | } | |
71e2a07a | 425 | argv_release(argv, argv_to_release, argc); |
01961bbd DDP |
426 | |
427 | /* Set the main thread affinity back to pre rte_eal_init() value */ | |
428 | if (auto_determine && !err) { | |
429 | err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), | |
430 | &cpuset); | |
431 | if (err) { | |
432 | VLOG_ERR("Thread setaffinity error %d", err); | |
433 | } | |
434 | } | |
435 | ||
01961bbd DDP |
436 | rte_memzone_dump(stdout); |
437 | ||
438 | /* We are called from the main thread here */ | |
439 | RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; | |
440 | ||
441 | #ifdef DPDK_PDUMP | |
442 | VLOG_INFO("DPDK pdump packet capture enabled"); | |
443 | err = rte_pdump_init(ovs_rundir()); | |
444 | if (err) { | |
445 | VLOG_INFO("Error initialising DPDK pdump"); | |
446 | rte_pdump_uninit(); | |
447 | } else { | |
448 | char *server_socket_path; | |
449 | ||
450 | server_socket_path = xasprintf("%s/%s", ovs_rundir(), | |
451 | "pdump_server_socket"); | |
452 | fatal_signal_add_file_to_unlink(server_socket_path); | |
453 | free(server_socket_path); | |
454 | } | |
455 | #endif | |
456 | ||
457 | /* Finally, register the dpdk classes */ | |
458 | netdev_dpdk_register(); | |
459 | } | |
460 | ||
461 | void | |
462 | dpdk_init(const struct smap *ovs_other_config) | |
463 | { | |
ec2b0701 DDP |
464 | static bool enabled = false; |
465 | ||
466 | if (enabled || !ovs_other_config) { | |
467 | return; | |
468 | } | |
469 | ||
470 | if (smap_get_bool(ovs_other_config, "dpdk-init", false)) { | |
471 | static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER; | |
01961bbd | 472 | |
ec2b0701 DDP |
473 | if (ovsthread_once_start(&once_enable)) { |
474 | VLOG_INFO("DPDK Enabled - initializing..."); | |
475 | dpdk_init__(ovs_other_config); | |
476 | enabled = true; | |
477 | VLOG_INFO("DPDK Enabled - initialized"); | |
478 | ovsthread_once_done(&once_enable); | |
479 | } | |
480 | } else { | |
5575908b | 481 | VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable"); |
01961bbd DDP |
482 | } |
483 | } | |
484 | ||
485 | const char * | |
486 | dpdk_get_vhost_sock_dir(void) | |
487 | { | |
488 | return vhost_sock_dir; | |
489 | } | |
490 | ||
a14d1cc8 MK |
491 | bool |
492 | dpdk_vhost_iommu_enabled(void) | |
493 | { | |
494 | return vhost_iommu_enabled; | |
495 | } | |
496 | ||
01961bbd DDP |
497 | void |
498 | dpdk_set_lcore_id(unsigned cpu) | |
499 | { | |
500 | /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */ | |
501 | ovs_assert(cpu != NON_PMD_CORE_ID); | |
502 | RTE_PER_LCORE(_lcore_id) = cpu; | |
503 | } |