]>
Commit | Line | Data |
---|---|---|
01961bbd | 1 | /* |
5575908b | 2 | * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc. |
01961bbd DDP |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "dpdk.h" | |
19 | ||
736ca516 | 20 | #include <stdio.h> |
01961bbd DDP |
21 | #include <sys/types.h> |
22 | #include <sys/stat.h> | |
23 | #include <getopt.h> | |
24 | ||
d7e2509e | 25 | #include <rte_errno.h> |
736ca516 | 26 | #include <rte_log.h> |
01961bbd | 27 | #include <rte_memzone.h> |
40c23a57 | 28 | #include <rte_version.h> |
a0cbc627 CL |
29 | #ifdef DPDK_PDUMP |
30 | #include <rte_mempool.h> | |
31 | #include <rte_pdump.h> | |
32 | #endif | |
01961bbd DDP |
33 | |
34 | #include "dirs.h" | |
a0cbc627 | 35 | #include "fatal-signal.h" |
01961bbd DDP |
36 | #include "netdev-dpdk.h" |
37 | #include "openvswitch/dynamic-string.h" | |
38 | #include "openvswitch/vlog.h" | |
7189d54c | 39 | #include "ovs-numa.h" |
01961bbd | 40 | #include "smap.h" |
68c00e3e | 41 | #include "svec.h" |
3e52fa56 | 42 | #include "vswitch-idl.h" |
01961bbd DDP |
43 | |
44 | VLOG_DEFINE_THIS_MODULE(dpdk); | |
45 | ||
736ca516 IM |
46 | static FILE *log_stream = NULL; /* Stream for DPDK log redirection */ |
47 | ||
01961bbd | 48 | static char *vhost_sock_dir = NULL; /* Location of vhost-user sockets */ |
a14d1cc8 | 49 | static bool vhost_iommu_enabled = false; /* Status of vHost IOMMU support */ |
3e52fa56 AC |
50 | static bool dpdk_initialized = false; /* Indicates successful initialization |
51 | * of DPDK. */ | |
43307ad0 | 52 | static bool per_port_memory = false; /* Status of per port memory support */ |
01961bbd DDP |
53 | |
54 | static int | |
6c4f08e2 | 55 | process_vhost_flags(char *flag, const char *default_val, int size, |
01961bbd DDP |
56 | const struct smap *ovs_other_config, |
57 | char **new_val) | |
58 | { | |
59 | const char *val; | |
60 | int changed = 0; | |
61 | ||
62 | val = smap_get(ovs_other_config, flag); | |
63 | ||
64 | /* Process the vhost-sock-dir flag if it is provided, otherwise resort to | |
65 | * default value. | |
66 | */ | |
67 | if (val && (strlen(val) <= size)) { | |
68 | changed = 1; | |
69 | *new_val = xstrdup(val); | |
70 | VLOG_INFO("User-provided %s in use: %s", flag, *new_val); | |
71 | } else { | |
72 | VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); | |
6c4f08e2 | 73 | *new_val = xstrdup(default_val); |
01961bbd DDP |
74 | } |
75 | ||
76 | return changed; | |
77 | } | |
78 | ||
01961bbd | 79 | static bool |
68c00e3e | 80 | args_contains(const struct svec *args, const char *value) |
01961bbd | 81 | { |
68c00e3e IM |
82 | const char *arg; |
83 | size_t i; | |
84 | ||
85 | /* We can't just use 'svec_contains' because args are not sorted. */ | |
86 | SVEC_FOR_EACH (i, arg, args) { | |
87 | if (!strcmp(arg, value)) { | |
01961bbd | 88 | return true; |
68c00e3e | 89 | } |
01961bbd DDP |
90 | } |
91 | return false; | |
92 | } | |
93 | ||
68c00e3e IM |
94 | static void |
95 | construct_dpdk_options(const struct smap *ovs_other_config, struct svec *args) | |
01961bbd DDP |
96 | { |
97 | struct dpdk_options_map { | |
98 | const char *ovs_configuration; | |
99 | const char *dpdk_option; | |
100 | bool default_enabled; | |
101 | const char *default_value; | |
102 | } opts[] = { | |
103 | {"dpdk-lcore-mask", "-c", false, NULL}, | |
104 | {"dpdk-hugepage-dir", "--huge-dir", false, NULL}, | |
105 | }; | |
106 | ||
68c00e3e | 107 | int i; |
01961bbd DDP |
108 | |
109 | /*First, construct from the flat-options (non-mutex)*/ | |
110 | for (i = 0; i < ARRAY_SIZE(opts); ++i) { | |
68c00e3e IM |
111 | const char *value = smap_get(ovs_other_config, |
112 | opts[i].ovs_configuration); | |
113 | if (!value && opts[i].default_enabled) { | |
114 | value = opts[i].default_value; | |
01961bbd DDP |
115 | } |
116 | ||
68c00e3e IM |
117 | if (value) { |
118 | if (!args_contains(args, opts[i].dpdk_option)) { | |
119 | svec_add(args, opts[i].dpdk_option); | |
120 | svec_add(args, value); | |
01961bbd DDP |
121 | } else { |
122 | VLOG_WARN("Ignoring database defined option '%s' due to " | |
68c00e3e | 123 | "dpdk-extra config", opts[i].dpdk_option); |
01961bbd DDP |
124 | } |
125 | } | |
126 | } | |
01961bbd DDP |
127 | } |
128 | ||
7189d54c MR |
129 | static char * |
130 | construct_dpdk_socket_mem(void) | |
131 | { | |
7189d54c | 132 | const char *def_value = "1024"; |
9c68ca34 IM |
133 | int numa, numa_nodes = ovs_numa_get_n_numas(); |
134 | struct ds dpdk_socket_mem = DS_EMPTY_INITIALIZER; | |
7189d54c MR |
135 | |
136 | if (numa_nodes == 0 || numa_nodes == OVS_NUMA_UNSPEC) { | |
137 | numa_nodes = 1; | |
138 | } | |
7189d54c | 139 | |
9c68ca34 | 140 | ds_put_cstr(&dpdk_socket_mem, def_value); |
7189d54c | 141 | for (numa = 1; numa < numa_nodes; ++numa) { |
9c68ca34 | 142 | ds_put_format(&dpdk_socket_mem, ",%s", def_value); |
7189d54c MR |
143 | } |
144 | ||
9c68ca34 | 145 | return ds_cstr(&dpdk_socket_mem); |
7189d54c MR |
146 | } |
147 | ||
01961bbd DDP |
148 | #define MAX_DPDK_EXCL_OPTS 10 |
149 | ||
68c00e3e | 150 | static void |
01961bbd | 151 | construct_dpdk_mutex_options(const struct smap *ovs_other_config, |
68c00e3e | 152 | struct svec *args) |
01961bbd | 153 | { |
7189d54c | 154 | char *default_dpdk_socket_mem = construct_dpdk_socket_mem(); |
68c00e3e | 155 | |
01961bbd DDP |
156 | struct dpdk_exclusive_options_map { |
157 | const char *category; | |
158 | const char *ovs_dpdk_options[MAX_DPDK_EXCL_OPTS]; | |
159 | const char *eal_dpdk_options[MAX_DPDK_EXCL_OPTS]; | |
160 | const char *default_value; | |
161 | int default_option; | |
162 | } excl_opts[] = { | |
163 | {"memory type", | |
164 | {"dpdk-alloc-mem", "dpdk-socket-mem", NULL,}, | |
165 | {"-m", "--socket-mem", NULL,}, | |
7189d54c | 166 | default_dpdk_socket_mem, 1 |
01961bbd DDP |
167 | }, |
168 | }; | |
169 | ||
68c00e3e | 170 | int i; |
01961bbd DDP |
171 | for (i = 0; i < ARRAY_SIZE(excl_opts); ++i) { |
172 | int found_opts = 0, scan, found_pos = -1; | |
173 | const char *found_value; | |
174 | struct dpdk_exclusive_options_map *popt = &excl_opts[i]; | |
175 | ||
176 | for (scan = 0; scan < MAX_DPDK_EXCL_OPTS | |
177 | && popt->ovs_dpdk_options[scan]; ++scan) { | |
68c00e3e IM |
178 | const char *value = smap_get(ovs_other_config, |
179 | popt->ovs_dpdk_options[scan]); | |
180 | if (value && strlen(value)) { | |
01961bbd DDP |
181 | found_opts++; |
182 | found_pos = scan; | |
68c00e3e | 183 | found_value = value; |
01961bbd DDP |
184 | } |
185 | } | |
186 | ||
187 | if (!found_opts) { | |
188 | if (popt->default_option) { | |
189 | found_pos = popt->default_option; | |
190 | found_value = popt->default_value; | |
191 | } else { | |
192 | continue; | |
193 | } | |
194 | } | |
195 | ||
196 | if (found_opts > 1) { | |
197 | VLOG_ERR("Multiple defined options for %s. Please check your" | |
198 | " database settings and reconfigure if necessary.", | |
199 | popt->category); | |
200 | } | |
201 | ||
68c00e3e IM |
202 | if (!args_contains(args, popt->eal_dpdk_options[found_pos])) { |
203 | svec_add(args, popt->eal_dpdk_options[found_pos]); | |
204 | svec_add(args, found_value); | |
01961bbd DDP |
205 | } else { |
206 | VLOG_WARN("Ignoring database defined option '%s' due to " | |
68c00e3e | 207 | "dpdk-extra config", popt->eal_dpdk_options[found_pos]); |
01961bbd DDP |
208 | } |
209 | } | |
210 | ||
7189d54c | 211 | free(default_dpdk_socket_mem); |
01961bbd DDP |
212 | } |
213 | ||
68c00e3e IM |
214 | static void |
215 | construct_dpdk_args(const struct smap *ovs_other_config, struct svec *args) | |
01961bbd | 216 | { |
68c00e3e | 217 | const char *extra_configuration = smap_get(ovs_other_config, "dpdk-extra"); |
01961bbd | 218 | |
01961bbd | 219 | if (extra_configuration) { |
68c00e3e | 220 | svec_parse_words(args, extra_configuration); |
01961bbd DDP |
221 | } |
222 | ||
68c00e3e IM |
223 | construct_dpdk_options(ovs_other_config, args); |
224 | construct_dpdk_mutex_options(ovs_other_config, args); | |
01961bbd DDP |
225 | } |
226 | ||
736ca516 IM |
227 | static ssize_t |
228 | dpdk_log_write(void *c OVS_UNUSED, const char *buf, size_t size) | |
229 | { | |
230 | char *str = xmemdup0(buf, size); | |
9fd38f68 IM |
231 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(600, 600); |
232 | static struct vlog_rate_limit dbg_rl = VLOG_RATE_LIMIT_INIT(600, 600); | |
736ca516 IM |
233 | |
234 | switch (rte_log_cur_msg_loglevel()) { | |
235 | case RTE_LOG_DEBUG: | |
9fd38f68 | 236 | VLOG_DBG_RL(&dbg_rl, "%s", str); |
736ca516 IM |
237 | break; |
238 | case RTE_LOG_INFO: | |
239 | case RTE_LOG_NOTICE: | |
9fd38f68 | 240 | VLOG_INFO_RL(&rl, "%s", str); |
736ca516 IM |
241 | break; |
242 | case RTE_LOG_WARNING: | |
9fd38f68 | 243 | VLOG_WARN_RL(&rl, "%s", str); |
736ca516 IM |
244 | break; |
245 | case RTE_LOG_ERR: | |
9fd38f68 | 246 | VLOG_ERR_RL(&rl, "%s", str); |
736ca516 IM |
247 | break; |
248 | case RTE_LOG_CRIT: | |
249 | case RTE_LOG_ALERT: | |
250 | case RTE_LOG_EMERG: | |
251 | VLOG_EMER("%s", str); | |
252 | break; | |
253 | default: | |
254 | OVS_NOT_REACHED(); | |
255 | } | |
256 | ||
257 | free(str); | |
258 | return size; | |
259 | } | |
260 | ||
261 | static cookie_io_functions_t dpdk_log_func = { | |
262 | .write = dpdk_log_write, | |
263 | }; | |
264 | ||
d7e2509e | 265 | static bool |
01961bbd DDP |
266 | dpdk_init__(const struct smap *ovs_other_config) |
267 | { | |
68c00e3e IM |
268 | char *sock_dir_subcomponent; |
269 | char **argv = NULL; | |
01961bbd | 270 | int result; |
01961bbd DDP |
271 | bool auto_determine = true; |
272 | int err = 0; | |
273 | cpu_set_t cpuset; | |
68c00e3e | 274 | struct svec args = SVEC_EMPTY_INITIALIZER; |
01961bbd | 275 | |
736ca516 IM |
276 | log_stream = fopencookie(NULL, "w+", dpdk_log_func); |
277 | if (log_stream == NULL) { | |
278 | VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno)); | |
279 | } else { | |
280 | setbuf(log_stream, NULL); | |
281 | rte_openlog_stream(log_stream); | |
282 | } | |
283 | ||
6c4f08e2 | 284 | if (process_vhost_flags("vhost-sock-dir", ovs_rundir(), |
01961bbd DDP |
285 | NAME_MAX, ovs_other_config, |
286 | &sock_dir_subcomponent)) { | |
287 | struct stat s; | |
288 | if (!strstr(sock_dir_subcomponent, "..")) { | |
289 | vhost_sock_dir = xasprintf("%s/%s", ovs_rundir(), | |
290 | sock_dir_subcomponent); | |
291 | ||
292 | err = stat(vhost_sock_dir, &s); | |
293 | if (err) { | |
294 | VLOG_ERR("vhost-user sock directory '%s' does not exist.", | |
295 | vhost_sock_dir); | |
296 | } | |
297 | } else { | |
298 | vhost_sock_dir = xstrdup(ovs_rundir()); | |
299 | VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid" | |
300 | "characters '..' - using %s instead.", | |
301 | ovs_rundir(), sock_dir_subcomponent, ovs_rundir()); | |
302 | } | |
303 | free(sock_dir_subcomponent); | |
304 | } else { | |
305 | vhost_sock_dir = sock_dir_subcomponent; | |
306 | } | |
307 | ||
a14d1cc8 MK |
308 | vhost_iommu_enabled = smap_get_bool(ovs_other_config, |
309 | "vhost-iommu-support", false); | |
310 | VLOG_INFO("IOMMU support for vhost-user-client %s.", | |
311 | vhost_iommu_enabled ? "enabled" : "disabled"); | |
312 | ||
43307ad0 IS |
313 | per_port_memory = smap_get_bool(ovs_other_config, |
314 | "per-port-memory", false); | |
315 | VLOG_INFO("Per port memory for DPDK devices %s.", | |
316 | per_port_memory ? "enabled" : "disabled"); | |
317 | ||
68c00e3e IM |
318 | svec_add(&args, ovs_get_program_name()); |
319 | construct_dpdk_args(ovs_other_config, &args); | |
01961bbd | 320 | |
68c00e3e IM |
321 | if (args_contains(&args, "-c") || args_contains(&args, "-l")) { |
322 | auto_determine = false; | |
01961bbd | 323 | } |
01961bbd DDP |
324 | |
325 | /** | |
326 | * NOTE: This is an unsophisticated mechanism for determining the DPDK | |
327 | * lcore for the DPDK Master. | |
328 | */ | |
329 | if (auto_determine) { | |
68c00e3e IM |
330 | int cpu = 0; |
331 | ||
01961bbd DDP |
332 | /* Get the main thread affinity */ |
333 | CPU_ZERO(&cpuset); | |
334 | err = pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), | |
335 | &cpuset); | |
336 | if (!err) { | |
68c00e3e IM |
337 | for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { |
338 | if (CPU_ISSET(cpu, &cpuset)) { | |
339 | break; | |
01961bbd DDP |
340 | } |
341 | } | |
342 | } else { | |
01961bbd | 343 | /* User did not set dpdk-lcore-mask and unable to get current |
68c00e3e IM |
344 | * thread affintity - default to core #0 */ |
345 | VLOG_ERR("Thread getaffinity error %d. Using core #0", err); | |
01961bbd | 346 | } |
68c00e3e IM |
347 | svec_add(&args, "-l"); |
348 | svec_add_nocopy(&args, xasprintf("%d", cpu)); | |
01961bbd DDP |
349 | } |
350 | ||
68c00e3e | 351 | svec_terminate(&args); |
01961bbd DDP |
352 | |
353 | optind = 1; | |
354 | ||
355 | if (VLOG_IS_INFO_ENABLED()) { | |
68c00e3e IM |
356 | struct ds eal_args = DS_EMPTY_INITIALIZER; |
357 | char *joined_args = svec_join(&args, " ", "."); | |
358 | ||
359 | ds_put_format(&eal_args, "EAL ARGS: %s", joined_args); | |
01961bbd DDP |
360 | VLOG_INFO("%s", ds_cstr_ro(&eal_args)); |
361 | ds_destroy(&eal_args); | |
68c00e3e | 362 | free(joined_args); |
01961bbd DDP |
363 | } |
364 | ||
68c00e3e IM |
365 | /* Copy because 'rte_eal_init' will change the argv, i.e. it will remove |
366 | * some arguments from it. '+1' to copy the terminating NULL. */ | |
367 | argv = xmemdup(args.names, (args.n + 1) * sizeof args.names[0]); | |
fe11b9e0 | 368 | |
01961bbd | 369 | /* Make sure things are initialized ... */ |
68c00e3e IM |
370 | result = rte_eal_init(args.n, argv); |
371 | ||
372 | free(argv); | |
373 | svec_destroy(&args); | |
01961bbd DDP |
374 | |
375 | /* Set the main thread affinity back to pre rte_eal_init() value */ | |
376 | if (auto_determine && !err) { | |
377 | err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), | |
378 | &cpuset); | |
379 | if (err) { | |
380 | VLOG_ERR("Thread setaffinity error %d", err); | |
381 | } | |
382 | } | |
383 | ||
d7e2509e AC |
384 | if (result < 0) { |
385 | VLOG_EMER("Unable to initialize DPDK: %s", ovs_strerror(rte_errno)); | |
386 | return false; | |
387 | } | |
388 | ||
01961bbd DDP |
389 | rte_memzone_dump(stdout); |
390 | ||
391 | /* We are called from the main thread here */ | |
392 | RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; | |
393 | ||
394 | #ifdef DPDK_PDUMP | |
395 | VLOG_INFO("DPDK pdump packet capture enabled"); | |
396 | err = rte_pdump_init(ovs_rundir()); | |
397 | if (err) { | |
398 | VLOG_INFO("Error initialising DPDK pdump"); | |
399 | rte_pdump_uninit(); | |
400 | } else { | |
401 | char *server_socket_path; | |
402 | ||
403 | server_socket_path = xasprintf("%s/%s", ovs_rundir(), | |
404 | "pdump_server_socket"); | |
405 | fatal_signal_add_file_to_unlink(server_socket_path); | |
406 | free(server_socket_path); | |
407 | } | |
408 | #endif | |
409 | ||
410 | /* Finally, register the dpdk classes */ | |
411 | netdev_dpdk_register(); | |
d7e2509e | 412 | return true; |
01961bbd DDP |
413 | } |
414 | ||
415 | void | |
416 | dpdk_init(const struct smap *ovs_other_config) | |
417 | { | |
ec2b0701 DDP |
418 | static bool enabled = false; |
419 | ||
420 | if (enabled || !ovs_other_config) { | |
421 | return; | |
422 | } | |
423 | ||
3e52fa56 AC |
424 | const char *dpdk_init_val = smap_get_def(ovs_other_config, "dpdk-init", |
425 | "false"); | |
426 | ||
427 | bool try_only = !strcmp(dpdk_init_val, "try"); | |
428 | if (!strcmp(dpdk_init_val, "true") || try_only) { | |
ec2b0701 | 429 | static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER; |
01961bbd | 430 | |
ec2b0701 | 431 | if (ovsthread_once_start(&once_enable)) { |
40c23a57 | 432 | VLOG_INFO("Using %s", rte_version()); |
ec2b0701 | 433 | VLOG_INFO("DPDK Enabled - initializing..."); |
d7e2509e AC |
434 | enabled = dpdk_init__(ovs_other_config); |
435 | if (enabled) { | |
436 | VLOG_INFO("DPDK Enabled - initialized"); | |
3e52fa56 | 437 | } else if (!try_only) { |
d7e2509e AC |
438 | ovs_abort(rte_errno, "Cannot init EAL"); |
439 | } | |
ec2b0701 | 440 | ovsthread_once_done(&once_enable); |
d7e2509e AC |
441 | } else { |
442 | VLOG_ERR_ONCE("DPDK Initialization Failed."); | |
ec2b0701 DDP |
443 | } |
444 | } else { | |
5575908b | 445 | VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable"); |
01961bbd | 446 | } |
3e52fa56 | 447 | dpdk_initialized = enabled; |
01961bbd DDP |
448 | } |
449 | ||
450 | const char * | |
451 | dpdk_get_vhost_sock_dir(void) | |
452 | { | |
453 | return vhost_sock_dir; | |
454 | } | |
455 | ||
a14d1cc8 MK |
456 | bool |
457 | dpdk_vhost_iommu_enabled(void) | |
458 | { | |
459 | return vhost_iommu_enabled; | |
460 | } | |
461 | ||
43307ad0 IS |
462 | bool |
463 | dpdk_per_port_memory(void) | |
464 | { | |
465 | return per_port_memory; | |
466 | } | |
467 | ||
01961bbd DDP |
468 | void |
469 | dpdk_set_lcore_id(unsigned cpu) | |
470 | { | |
471 | /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */ | |
472 | ovs_assert(cpu != NON_PMD_CORE_ID); | |
473 | RTE_PER_LCORE(_lcore_id) = cpu; | |
474 | } | |
40c23a57 MC |
475 | |
476 | void | |
477 | print_dpdk_version(void) | |
478 | { | |
479 | puts(rte_version()); | |
480 | } | |
3e52fa56 AC |
481 | |
482 | void | |
483 | dpdk_status(const struct ovsrec_open_vswitch *cfg) | |
484 | { | |
485 | if (cfg) { | |
486 | ovsrec_open_vswitch_set_dpdk_initialized(cfg, dpdk_initialized); | |
487 | ovsrec_open_vswitch_set_dpdk_version(cfg, rte_version()); | |
488 | } | |
489 | } |