2 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include <sys/types.h>
24 #include <rte_memzone.h>
26 #include <rte_mempool.h>
27 #include <rte_pdump.h>
31 #include "fatal-signal.h"
32 #include "netdev-dpdk.h"
33 #include "openvswitch/dynamic-string.h"
34 #include "openvswitch/vlog.h"
37 VLOG_DEFINE_THIS_MODULE(dpdk
);
39 static char *vhost_sock_dir
= NULL
; /* Location of vhost-user sockets */
42 process_vhost_flags(char *flag
, char *default_val
, int size
,
43 const struct smap
*ovs_other_config
,
49 val
= smap_get(ovs_other_config
, flag
);
51 /* Process the vhost-sock-dir flag if it is provided, otherwise resort to
54 if (val
&& (strlen(val
) <= size
)) {
56 *new_val
= xstrdup(val
);
57 VLOG_INFO("User-provided %s in use: %s", flag
, *new_val
);
59 VLOG_INFO("No %s provided - defaulting to %s", flag
, default_val
);
60 *new_val
= default_val
;
67 grow_argv(char ***argv
, size_t cur_siz
, size_t grow_by
)
69 return xrealloc(*argv
, sizeof(char *) * (cur_siz
+ grow_by
));
73 dpdk_option_extend(char ***argv
, int argc
, const char *option
,
76 char **newargv
= grow_argv(argv
, argc
, 2);
78 newargv
[argc
] = xstrdup(option
);
79 newargv
[argc
+1] = xstrdup(value
);
83 move_argv(char ***argv
, size_t cur_size
, char **src_argv
, size_t src_argc
)
85 char **newargv
= grow_argv(argv
, cur_size
, src_argc
);
87 newargv
[cur_size
+src_argc
] = src_argv
[src_argc
];
88 src_argv
[src_argc
] = NULL
;
94 extra_dpdk_args(const char *ovs_extra_config
, char ***argv
, int argc
)
97 char *release_tok
= xstrdup(ovs_extra_config
);
98 char *tok
, *endptr
= NULL
;
100 for (tok
= strtok_r(release_tok
, " ", &endptr
); tok
!= NULL
;
101 tok
= strtok_r(NULL
, " ", &endptr
)) {
102 char **newarg
= grow_argv(argv
, ret
, 1);
104 newarg
[ret
++] = xstrdup(tok
);
111 argv_contains(char **argv_haystack
, const size_t argc_haystack
,
114 for (size_t i
= 0; i
< argc_haystack
; ++i
) {
115 if (!strcmp(argv_haystack
[i
], needle
))
122 construct_dpdk_options(const struct smap
*ovs_other_config
,
123 char ***argv
, const int initial_size
,
124 char **extra_args
, const size_t extra_argc
)
126 struct dpdk_options_map
{
127 const char *ovs_configuration
;
128 const char *dpdk_option
;
129 bool default_enabled
;
130 const char *default_value
;
132 {"dpdk-lcore-mask", "-c", false, NULL
},
133 {"dpdk-hugepage-dir", "--huge-dir", false, NULL
},
136 int i
, ret
= initial_size
;
138 /*First, construct from the flat-options (non-mutex)*/
139 for (i
= 0; i
< ARRAY_SIZE(opts
); ++i
) {
140 const char *lookup
= smap_get(ovs_other_config
,
141 opts
[i
].ovs_configuration
);
142 if (!lookup
&& opts
[i
].default_enabled
) {
143 lookup
= opts
[i
].default_value
;
147 if (!argv_contains(extra_args
, extra_argc
, opts
[i
].dpdk_option
)) {
148 dpdk_option_extend(argv
, ret
, opts
[i
].dpdk_option
, lookup
);
151 VLOG_WARN("Ignoring database defined option '%s' due to "
152 "dpdk_extras config", opts
[i
].dpdk_option
);
160 #define MAX_DPDK_EXCL_OPTS 10
163 construct_dpdk_mutex_options(const struct smap
*ovs_other_config
,
164 char ***argv
, const int initial_size
,
165 char **extra_args
, const size_t extra_argc
)
167 struct dpdk_exclusive_options_map
{
168 const char *category
;
169 const char *ovs_dpdk_options
[MAX_DPDK_EXCL_OPTS
];
170 const char *eal_dpdk_options
[MAX_DPDK_EXCL_OPTS
];
171 const char *default_value
;
175 {"dpdk-alloc-mem", "dpdk-socket-mem", NULL
,},
176 {"-m", "--socket-mem", NULL
,},
181 int i
, ret
= initial_size
;
182 for (i
= 0; i
< ARRAY_SIZE(excl_opts
); ++i
) {
183 int found_opts
= 0, scan
, found_pos
= -1;
184 const char *found_value
;
185 struct dpdk_exclusive_options_map
*popt
= &excl_opts
[i
];
187 for (scan
= 0; scan
< MAX_DPDK_EXCL_OPTS
188 && popt
->ovs_dpdk_options
[scan
]; ++scan
) {
189 const char *lookup
= smap_get(ovs_other_config
,
190 popt
->ovs_dpdk_options
[scan
]);
191 if (lookup
&& strlen(lookup
)) {
194 found_value
= lookup
;
199 if (popt
->default_option
) {
200 found_pos
= popt
->default_option
;
201 found_value
= popt
->default_value
;
207 if (found_opts
> 1) {
208 VLOG_ERR("Multiple defined options for %s. Please check your"
209 " database settings and reconfigure if necessary.",
213 if (!argv_contains(extra_args
, extra_argc
,
214 popt
->eal_dpdk_options
[found_pos
])) {
215 dpdk_option_extend(argv
, ret
, popt
->eal_dpdk_options
[found_pos
],
219 VLOG_WARN("Ignoring database defined option '%s' due to "
220 "dpdk_extras config", popt
->eal_dpdk_options
[found_pos
]);
228 get_dpdk_args(const struct smap
*ovs_other_config
, char ***argv
,
231 const char *extra_configuration
;
232 char **extra_args
= NULL
;
234 size_t extra_argc
= 0;
236 extra_configuration
= smap_get(ovs_other_config
, "dpdk-extra");
237 if (extra_configuration
) {
238 extra_argc
= extra_dpdk_args(extra_configuration
, &extra_args
, 0);
241 i
= construct_dpdk_options(ovs_other_config
, argv
, argc
, extra_args
,
243 i
= construct_dpdk_mutex_options(ovs_other_config
, argv
, i
, extra_args
,
246 if (extra_configuration
) {
247 *argv
= move_argv(argv
, i
, extra_args
, extra_argc
);
250 return i
+ extra_argc
;
253 static char **dpdk_argv
;
254 static int dpdk_argc
;
257 deferred_argv_release(void)
260 for (result
= 0; result
< dpdk_argc
; ++result
) {
261 free(dpdk_argv
[result
]);
268 dpdk_init__(const struct smap
*ovs_other_config
)
273 bool auto_determine
= true;
276 char *sock_dir_subcomponent
;
278 if (!smap_get_bool(ovs_other_config
, "dpdk-init", false)) {
279 VLOG_INFO("DPDK Disabled - to change this requires a restart.\n");
283 VLOG_INFO("DPDK Enabled, initializing");
284 if (process_vhost_flags("vhost-sock-dir", xstrdup(ovs_rundir()),
285 NAME_MAX
, ovs_other_config
,
286 &sock_dir_subcomponent
)) {
288 if (!strstr(sock_dir_subcomponent
, "..")) {
289 vhost_sock_dir
= xasprintf("%s/%s", ovs_rundir(),
290 sock_dir_subcomponent
);
292 err
= stat(vhost_sock_dir
, &s
);
294 VLOG_ERR("vhost-user sock directory '%s' does not exist.",
298 vhost_sock_dir
= xstrdup(ovs_rundir());
299 VLOG_ERR("vhost-user sock directory request '%s/%s' has invalid"
300 "characters '..' - using %s instead.",
301 ovs_rundir(), sock_dir_subcomponent
, ovs_rundir());
303 free(sock_dir_subcomponent
);
305 vhost_sock_dir
= sock_dir_subcomponent
;
308 argv
= grow_argv(&argv
, 0, 1);
310 argv
[0] = xstrdup(ovs_get_program_name());
311 argc_tmp
= get_dpdk_args(ovs_other_config
, &argv
, argc
);
313 while (argc_tmp
!= argc
) {
314 if (!strcmp("-c", argv
[argc
]) || !strcmp("-l", argv
[argc
])) {
315 auto_determine
= false;
323 * NOTE: This is an unsophisticated mechanism for determining the DPDK
324 * lcore for the DPDK Master.
326 if (auto_determine
) {
328 /* Get the main thread affinity */
330 err
= pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t
),
333 for (i
= 0; i
< CPU_SETSIZE
; i
++) {
334 if (CPU_ISSET(i
, &cpuset
)) {
335 argv
= grow_argv(&argv
, argc
, 2);
336 argv
[argc
++] = xstrdup("-c");
337 argv
[argc
++] = xasprintf("0x%08llX", (1ULL<<i
));
342 VLOG_ERR("Thread getaffinity error %d. Using core 0x1", err
);
343 /* User did not set dpdk-lcore-mask and unable to get current
344 * thread affintity - default to core 0x1 */
345 argv
= grow_argv(&argv
, argc
, 2);
346 argv
[argc
++] = xstrdup("-c");
347 argv
[argc
++] = xasprintf("0x%X", 1);
351 argv
= grow_argv(&argv
, argc
, 1);
356 if (VLOG_IS_INFO_ENABLED()) {
360 ds_put_cstr(&eal_args
, "EAL ARGS:");
361 for (opt
= 0; opt
< argc
; ++opt
) {
362 ds_put_cstr(&eal_args
, " ");
363 ds_put_cstr(&eal_args
, argv
[opt
]);
365 VLOG_INFO("%s", ds_cstr_ro(&eal_args
));
366 ds_destroy(&eal_args
);
369 /* Make sure things are initialized ... */
370 result
= rte_eal_init(argc
, argv
);
372 ovs_abort(result
, "Cannot init EAL");
375 /* Set the main thread affinity back to pre rte_eal_init() value */
376 if (auto_determine
&& !err
) {
377 err
= pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t
),
380 VLOG_ERR("Thread setaffinity error %d", err
);
387 atexit(deferred_argv_release
);
389 rte_memzone_dump(stdout
);
391 /* We are called from the main thread here */
392 RTE_PER_LCORE(_lcore_id
) = NON_PMD_CORE_ID
;
395 VLOG_INFO("DPDK pdump packet capture enabled");
396 err
= rte_pdump_init(ovs_rundir());
398 VLOG_INFO("Error initialising DPDK pdump");
401 char *server_socket_path
;
403 server_socket_path
= xasprintf("%s/%s", ovs_rundir(),
404 "pdump_server_socket");
405 fatal_signal_add_file_to_unlink(server_socket_path
);
406 free(server_socket_path
);
410 /* Finally, register the dpdk classes */
411 netdev_dpdk_register();
415 dpdk_init(const struct smap
*ovs_other_config
)
417 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
419 if (ovs_other_config
&& ovsthread_once_start(&once
)) {
420 dpdk_init__(ovs_other_config
);
421 ovsthread_once_done(&once
);
426 dpdk_get_vhost_sock_dir(void)
428 return vhost_sock_dir
;
432 dpdk_set_lcore_id(unsigned cpu
)
434 /* NON_PMD_CORE_ID is reserved for use by non pmd threads. */
435 ovs_assert(cpu
!= NON_PMD_CORE_ID
);
436 RTE_PER_LCORE(_lcore_id
) = cpu
;