2 * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include <sys/types.h>
26 #include <rte_memzone.h>
27 #include <rte_version.h>
29 #include <rte_mempool.h>
30 #include <rte_pdump.h>
34 #include "fatal-signal.h"
35 #include "netdev-dpdk.h"
36 #include "openvswitch/dynamic-string.h"
37 #include "openvswitch/vlog.h"
40 VLOG_DEFINE_THIS_MODULE(dpdk
);
42 static FILE *log_stream
= NULL
; /* Stream for DPDK log redirection */
44 static char *vhost_sock_dir
= NULL
; /* Location of vhost-user sockets */
45 static bool vhost_iommu_enabled
= false; /* Status of vHost IOMMU support */
48 process_vhost_flags(char *flag
, const char *default_val
, int size
,
49 const struct smap
*ovs_other_config
,
55 val
= smap_get(ovs_other_config
, flag
);
57 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
60 if (val
&& (strlen(val
) <= size
)) {
62 *new_val
= xstrdup(val
);
63 VLOG_INFO("User-provided %s in use: %s", flag
, *new_val
);
65 VLOG_INFO("No %s provided - defaulting to %s", flag
, default_val
);
66 *new_val
= xstrdup(default_val
);
73 grow_argv(char ***argv
, size_t cur_siz
, size_t grow_by
)
75 return xrealloc(*argv
, sizeof(char *) * (cur_siz
+ grow_by
));
79 dpdk_option_extend(char ***argv
, int argc
, const char *option
,
82 char **newargv
= grow_argv(argv
, argc
, 2);
84 newargv
[argc
] = xstrdup(option
);
85 newargv
[argc
+1] = xstrdup(value
);
89 move_argv(char ***argv
, size_t cur_size
, char **src_argv
, size_t src_argc
)
91 char **newargv
= grow_argv(argv
, cur_size
, src_argc
);
93 newargv
[cur_size
+src_argc
] = src_argv
[src_argc
];
94 src_argv
[src_argc
] = NULL
;
100 extra_dpdk_args(const char *ovs_extra_config
, char ***argv
, int argc
)
103 char *release_tok
= xstrdup(ovs_extra_config
);
104 char *tok
, *endptr
= NULL
;
106 for (tok
= strtok_r(release_tok
, " ", &endptr
); tok
!= NULL
;
107 tok
= strtok_r(NULL
, " ", &endptr
)) {
108 char **newarg
= grow_argv(argv
, ret
, 1);
110 newarg
[ret
++] = xstrdup(tok
);
117 argv_contains(char **argv_haystack
, const size_t argc_haystack
,
120 for (size_t i
= 0; i
< argc_haystack
; ++i
) {
121 if (!strcmp(argv_haystack
[i
], needle
))
128 construct_dpdk_options(const struct smap
*ovs_other_config
,
129 char ***argv
, const int initial_size
,
130 char **extra_args
, const size_t extra_argc
)
132 struct dpdk_options_map
{
133 const char *ovs_configuration
;
134 const char *dpdk_option
;
135 bool default_enabled
;
136 const char *default_value
;
138 {"dpdk-lcore-mask", "-c", false, NULL
},
139 {"dpdk-hugepage-dir", "--huge-dir", false, NULL
},
142 int i
, ret
= initial_size
;
144 /*First, construct from the flat-options (non-mutex)*/
145 for (i
= 0; i
< ARRAY_SIZE(opts
); ++i
) {
146 const char *lookup
= smap_get(ovs_other_config
,
147 opts
[i
].ovs_configuration
);
148 if (!lookup
&& opts
[i
].default_enabled
) {
149 lookup
= opts
[i
].default_value
;
153 if (!argv_contains(extra_args
, extra_argc
, opts
[i
].dpdk_option
)) {
154 dpdk_option_extend(argv
, ret
, opts
[i
].dpdk_option
, lookup
);
157 VLOG_WARN("Ignoring database defined option '%s' due to "
158 "dpdk_extras config", opts
[i
].dpdk_option
);
166 #define MAX_DPDK_EXCL_OPTS 10
169 construct_dpdk_mutex_options(const struct smap
*ovs_other_config
,
170 char ***argv
, const int initial_size
,
171 char **extra_args
, const size_t extra_argc
)
173 struct dpdk_exclusive_options_map
{
174 const char *category
;
175 const char *ovs_dpdk_options
[MAX_DPDK_EXCL_OPTS
];
176 const char *eal_dpdk_options
[MAX_DPDK_EXCL_OPTS
];
177 const char *default_value
;
181 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL
,},
182 {"-m", "--socket-mem", NULL
,},
187 int i
, ret
= initial_size
;
188 for (i
= 0; i
< ARRAY_SIZE(excl_opts
); ++i
) {
189 int found_opts
= 0, scan
, found_pos
= -1;
190 const char *found_value
;
191 struct dpdk_exclusive_options_map
*popt
= &excl_opts
[i
];
193 for (scan
= 0; scan
< MAX_DPDK_EXCL_OPTS
194 && popt
->ovs_dpdk_options
[scan
]; ++scan
) {
195 const char *lookup
= smap_get(ovs_other_config
,
196 popt
->ovs_dpdk_options
[scan
]);
197 if (lookup
&& strlen(lookup
)) {
200 found_value
= lookup
;
205 if (popt
->default_option
) {
206 found_pos
= popt
->default_option
;
207 found_value
= popt
->default_value
;
213 if (found_opts
> 1) {
214 VLOG_ERR("Multiple defined options for %s. Please check your"
215 " database settings and reconfigure if necessary.",
219 if (!argv_contains(extra_args
, extra_argc
,
220 popt
->eal_dpdk_options
[found_pos
])) {
221 dpdk_option_extend(argv
, ret
, popt
->eal_dpdk_options
[found_pos
],
225 VLOG_WARN("Ignoring database defined option '%s' due to "
226 "dpdk_extras config", popt
->eal_dpdk_options
[found_pos
]);
234 get_dpdk_args(const struct smap
*ovs_other_config
, char ***argv
,
237 const char *extra_configuration
;
238 char **extra_args
= NULL
;
240 size_t extra_argc
= 0;
242 extra_configuration
= smap_get(ovs_other_config
, "dpdk-extra");
243 if (extra_configuration
) {
244 extra_argc
= extra_dpdk_args(extra_configuration
, &extra_args
, 0);
247 i
= construct_dpdk_options(ovs_other_config
, argv
, argc
, extra_args
,
249 i
= construct_dpdk_mutex_options(ovs_other_config
, argv
, i
, extra_args
,
252 if (extra_configuration
) {
253 *argv
= move_argv(argv
, i
, extra_args
, extra_argc
);
256 return i
+ extra_argc
;
260 argv_release(char **dpdk_argv
, char **dpdk_argv_release
, size_t dpdk_argc
)
263 for (result
= 0; result
< dpdk_argc
; ++result
) {
264 free(dpdk_argv_release
[result
]);
267 free(dpdk_argv_release
);
272 dpdk_log_write(void *c OVS_UNUSED
, const char *buf
, size_t size
)
274 char *str
= xmemdup0(buf
, size
);
276 switch (rte_log_cur_msg_loglevel()) {
282 VLOG_INFO("%s", str
);
284 case RTE_LOG_WARNING
:
285 VLOG_WARN("%s", str
);
293 VLOG_EMER("%s", str
);
303 static cookie_io_functions_t dpdk_log_func
= {
304 .write
= dpdk_log_write
,
308 dpdk_init__(const struct smap
*ovs_other_config
)
310 char **argv
= NULL
, **argv_to_release
= NULL
;
313 bool auto_determine
= true;
316 char *sock_dir_subcomponent
;
318 log_stream
= fopencookie(NULL
, "w+", dpdk_log_func
);
319 if (log_stream
== NULL
) {
320 VLOG_ERR("Can't redirect DPDK log: %s.", ovs_strerror(errno
));
322 setbuf(log_stream
, NULL
);
323 rte_openlog_stream(log_stream
);
326 if (process_vhost_flags("vhost-sock-dir", ovs_rundir(),
327 NAME_MAX
, ovs_other_config
,
328 &sock_dir_subcomponent
)) {
330 if (!strstr(sock_dir_subcomponent
, "..")) {
331 vhost_sock_dir
= xasprintf("%s/%s", ovs_rundir(),
332 sock_dir_subcomponent
);
334 err
= stat(vhost_sock_dir
, &s
);
336 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
340 vhost_sock_dir
= xstrdup(ovs_rundir());
341 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
342 "characters '..' - using %s instead.",
343 ovs_rundir(), sock_dir_subcomponent
, ovs_rundir());
345 free(sock_dir_subcomponent
);
347 vhost_sock_dir
= sock_dir_subcomponent
;
350 vhost_iommu_enabled
= smap_get_bool(ovs_other_config
,
351 "vhost-iommu-support", false);
352 VLOG_INFO("IOMMU support for vhost-user-client %s.",
353 vhost_iommu_enabled
? "enabled" : "disabled");
355 argv
= grow_argv(&argv
, 0, 1);
357 argv
[0] = xstrdup(ovs_get_program_name());
358 argc_tmp
= get_dpdk_args(ovs_other_config
, &argv
, argc
);
360 while (argc_tmp
!= argc
) {
361 if (!strcmp("-c", argv
[argc
]) || !strcmp("-l", argv
[argc
])) {
362 auto_determine
= false;
370 * NOTE: This is an unsophisticated mechanism for determining the DPDK
371 * lcore for the DPDK Master.
373 if (auto_determine
) {
375 /* Get the main thread affinity */
377 err
= pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t
),
380 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
381 if (CPU_ISSET(i
, &cpuset
)) {
382 argv
= grow_argv(&argv
, argc
, 2);
383 argv
[argc
++] = xstrdup("-c");
384 argv
[argc
++] = xasprintf("0x%08llX", (1ULL<<i
));
389 VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err
);
390 /* User did not set dpdk-lcore-mask and unable to get current
391 * thread affintity - default to core 0x1 */
392 argv
= grow_argv(&argv
, argc
, 2);
393 argv
[argc
++] = xstrdup("-c");
394 argv
[argc
++] = xasprintf("0x%X", 1);
398 argv
= grow_argv(&argv
, argc
, 1);
403 if (VLOG_IS_INFO_ENABLED()) {
407 ds_put_cstr(&eal_args
, "EAL ARGS:");
408 for (opt
= 0; opt
< argc
; ++opt
) {
409 ds_put_cstr(&eal_args
, " ");
410 ds_put_cstr(&eal_args
, argv
[opt
]);
412 VLOG_INFO("%s", ds_cstr_ro(&eal_args
));
413 ds_destroy(&eal_args
);
416 argv_to_release
= grow_argv(&argv_to_release
, 0, argc
);
417 for (argc_tmp
= 0; argc_tmp
< argc
; ++argc_tmp
) {
418 argv_to_release
[argc_tmp
] = argv
[argc_tmp
];
421 /* Make sure things are initialized ... */
422 result
= rte_eal_init(argc
, argv
);
424 ovs_abort(result
, "Cannot init EAL");
426 argv_release(argv
, argv_to_release
, argc
);
428 /* Set the main thread affinity back to pre rte_eal_init() value */
429 if (auto_determine
&& !err
) {
430 err
= pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t
),
433 VLOG_ERR("Thread setaffinity error %d", err
);
437 rte_memzone_dump(stdout
);
439 /* We are called from the main thread here */
440 RTE_PER_LCORE(_lcore_id
) = NON_PMD_CORE_ID
;
443 VLOG_INFO("DPDK pdump packet capture enabled");
444 err
= rte_pdump_init(ovs_rundir());
446 VLOG_INFO("Error initialising DPDK pdump");
449 char *server_socket_path
;
451 server_socket_path
= xasprintf("%s/%s", ovs_rundir(),
452 "pdump_server_socket");
453 fatal_signal_add_file_to_unlink(server_socket_path
);
454 free(server_socket_path
);
458 /* Finally, register the dpdk classes */
459 netdev_dpdk_register();
463 dpdk_init(const struct smap
*ovs_other_config
)
465 static bool enabled
= false;
467 if (enabled
|| !ovs_other_config
) {
471 if (smap_get_bool(ovs_other_config
, "dpdk-init", false)) {
472 static struct ovsthread_once once_enable
= OVSTHREAD_ONCE_INITIALIZER
;
474 if (ovsthread_once_start(&once_enable
)) {
475 VLOG_INFO("Using %s", rte_version());
476 VLOG_INFO("DPDK Enabled - initializing...");
477 dpdk_init__(ovs_other_config
);
479 VLOG_INFO("DPDK Enabled - initialized");
480 ovsthread_once_done(&once_enable
);
483 VLOG_INFO_ONCE("DPDK Disabled - Use other_config:dpdk-init to enable");
488 dpdk_get_vhost_sock_dir(void)
490 return vhost_sock_dir
;
494 dpdk_vhost_iommu_enabled(void)
496 return vhost_iommu_enabled
;
500 dpdk_set_lcore_id(unsigned cpu
)
502 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
503 ovs_assert(cpu
!= NON_PMD_CORE_ID
);
504 RTE_PER_LCORE(_lcore_id
) = cpu
;
508 print_dpdk_version(void)